class is just bytes
A struct or class is the same as any other object. In C and C++ terminology, even an int is an object: http://en.cppreference.com/w/c/language/object. i.e. A contiguous block of bytes that you can memcpy around (except for non-POD types in C++).
- whatās ABI? The ABI is what nails down some stuff that the C and C++ standards leave āimplementation dependentā, so that all compilers for that ABI can make code that can call each otherās functions.)
how struct compiles to?
class foo {
int m_a;
int m_b;
void inc_a(void){ m_a++; }
int inc_b(void);
};
int foo::inc_b(void) { return m_b++; }
foo::inc_b(): # args: this in RDI
mov eax, DWORD PTR [rdi+4] # eax = this->m_b
lea edx, [rax+1] # edx = eax+1
mov DWORD PTR [rdi+4], edx # this->m_b = edx
ret
- the this pointer is passed as an implicit first argument
- m_b is stored at 4 bytes from the start of the struct/class.
how virtual struct compiles to?
#include <iostream>
#include <memory>
class foo {
public:
int m_a;
int m_b;
void inc_a(void){ m_a++; }
void inc_b(void);
virtual void inc_v(void);
};
void foo::inc_b(void) { m_b++; }
class bar: public foo {
public:
virtual void inc_v(void); // overrides foo::inc_v even for users that access it through a pointer to class foo
};
void caller_foo(foo *p){
p->inc_v();
}
int main(){
foo* ptr = new foo();
ptr = new bar();
caller_foo(ptr);
std::cout << (long)ptr << std::endl;
}
letās table a look at the view of vtable on gdb
(gdb) print ptr
$1 = (foo *) 0x55555556aeb0
(gdprint *ptrptr
$2 = {_vptr.foo = 0x555555557d40 <vtable for bar+16>, m_a = 1, m_b = 0}
(gdbx/300xb 0x555555557d30 0
0x555555557d30 <vtable for bar>: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x555555557d38 <vtable for bar+8>: 0x60 0x7d 0x55 0x55 0x55 0x55 0x00 0x00
0x555555557d40 <vtable for bar+16>: 0x0a 0x52 0x55 0x55 0x55 0x55 0x00 0x00
0x555555557d48 <vtable for foo>: 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
0x555555557d50 <vtable for foo+8>: 0x78 0x7d 0x55 0x55 0x55 0x55 0x00 0x00
0x555555557d58 <vtable for foo+16>: 0xea 0x51 0x55 0x55 0x55 0x55 0x00 0x00
0x555555557d60 <typeinfo for bar>: 0x98 0xbc 0xfa 0xf7 0xff 0x7f 0x00 0x00
0x555555557d68 <typeinfo for bar+8>: 0x0e 0x60 0x55 0x55 0x55 0x55 0x00 0x00
0x555555557d70 <typeinfo for bar+16>: 0x78 0x7d 0x55 0x55 0x55 0x55 0x00 0x00
0x555555557d78 <typeinfo for foo>: 0x08 0xb0 0xfa 0xf7 0xff 0x7f 0x00 0x00
0x555555557d80 <typeinfo for foo+8>: 0x13 0x60 0x55 0x55 0x55 0x55 0x00 0x00
(gdb) info symbol 0x55555555520a
bar::inc_v() in section .text of /home/a.out
for godbolt: https://www.godbolt.org/z/Mse5v7W9h
typeinfo name for foo:
.string "3foo"
typeinfo for foo:
.quad vtable for __cxxabiv1::__class_type_info+16
.quad typeinfo name for foo
typeinfo name for bar:
.string "3bar"
typeinfo for bar:
.quad vtable for __cxxabiv1::__si_class_type_info+16
.quad typeinfo name for bar
.quad typeinfo for foo
vtable for foo:
.quad 0
.quad typeinfo for foo
.quad foo::inc_v()
vtable for bar:
.quad 0
.quad typeinfo for bar
.quad bar::inc_v()
The compiler does all the VTABLE setup, VPTR initialization, and inserting the appropriate code to connect VPTR to VTABLE implicitly, without giving any hint to the programmer. The programmer can just be happy that it all works and need not bother.
letās take a look at where the late binding happens in -O0, no optimization: https://www.godbolt.org/z/7ovMnTf7E
foo* ptr = new foo();
translate to
foo::foo() [base object constructor]:
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-8], rdi // %rdi where this pointer stores, copy it to the first on [stack + 8]
mov edx, OFFSET FLAT:vtable for foo+16 // vtable address to %edx
mov rax, QWORD PTR [rbp-8] // address of this ptr to %rax
mov QWORD PTR [rax], rdx // copy %edx to [rax], which is the 1st element of this pointer,
// we are done of **VPTR initialization, late binded**
nop
pop rbp
ret
...
mov edi, 16
call operator new(unsigned long) // allocate mem on [rbx]
mov rbx, rax // initialize value to [rbx]
mov QWORD PTR [rbx], 0 // VPTR of foo
mov DWORD PTR [rbx+8], 0 // int m_a of foo
mov DWORD PTR [rbx+12], 0 // int m_b of foo
mov rdi, rbx // address of allocated memory to %rdi, where is value of this ptr.
call foo::foo() [complete object constructor]
mov QWORD PTR [rbp-24], rbx
to make the fact that late binding happens during compile time, let company it to
ptr = new bar();
which translates to
bar::bar() [base object constructor]:
push rbp
mov rbp, rsp
sub rsp, 16
mov QWORD PTR [rbp-8], rdi
mov rax, QWORD PTR [rbp-8]
mov rdi, rax
call foo::foo() [base object constructor]
mov edx, OFFSET FLAT:vtable for bar+16 // let compare here!
mov rax, QWORD PTR [rbp-8]
mov QWORD PTR [rax], rdx
nop
leave
ret
...
mov edi, 16
call operator new(unsigned long)
mov rbx, rax
mov QWORD PTR [rbx], 0
mov DWORD PTR [rbx+8], 0
mov DWORD PTR [rbx+12], 0
mov rdi, rbx
call bar::bar() [complete object constructor]
mov QWORD PTR [rbp-24], rbx
from the comment above, we will know where the VPTR is initialized;
then late move to how itās got called on compiler level:
void caller_foo(foo *p){
p->inc_v();
}
...
caller_foo(ptr);
translate to
caller_foo(foo*):
push rbp
mov rbp, rsp
sub rsp, 16
mov QWORD PTR [rbp-8], rdi
mov rax, QWORD PTR [rbp-8] // load val(this) to %rax
mov rax, QWORD PTR [rax] // load VPTR val, addr(bar vtable)
mov rdx, QWORD PTR [rax] // load bar vtable val, addr(bar::inc_v())
mov rax, QWORD PTR [rbp-8]
mov rdi, rax // make call on bar::inc_v()
call rdx
nop
leave
ret
...
mov rax, QWORD PTR [rbp-24]
mov rdi, rax // copy the this ptr to %rdi
call caller_foo(foo*)
the compiler finds the correct function to call, by the vtable/vptr indirection. The late binding works!