PPTX

advertisement
Machine-Level Programming IV:
x86-64 Procedures, Data
Andrew Case
Slides adapted from Jinyang Li, Randy Bryant & Dave O’Hallaron
1
Today


Procedures (x86-64)
Arrays
 One-dimensional
 Multi-dimensional (nested)
 Multi-level

Structures
 Allocation
 Access
2
x86-64 Registers

Arguments passed to functions via registers
 If more than 6 integral parameters, then pass rest on stack
 These registers can be used as caller-saved as well

All references to stack frame via stack pointer
 Eliminates need to update %ebp/%rbp
3
x86-64 Integer Registers:
Usage Conventions
%rax
Return value
%r8
Argument #5
%rbx
Callee saved
%r9
Argument #6
%rcx
Argument #4
%r10
Caller saved
%rdx
Argument #3
%r11
Caller Saved
%rsi
Argument #2
%r12
Callee saved
%rdi
Argument #1
%r13
Callee saved
%rsp
Stack pointer
%r14
Callee saved
%rbp
Callee saved
%r15
Callee saved
4
x86-64 Long Swap
void swap_l(long *xp, long *yp)
{
long t0 = *xp;
long t1 = *yp;
*xp = t1;
*yp = t0;
}

swap:
movq
movq
movq
movq
ret
(%rdi), %rdx
(%rsi), %rax
%rax, (%rdi)
%rdx, (%rsi)
Operands passed in registers
 First (xp) in %rdi, second (yp) in %rsi
 64-bit pointers

No stack operations required (except ret)
 Can hold all local information in registers
5
x86-64 Leaf call without Stack Frame
/* Swap, using local array */
void swap_a(long *xp, long *yp)
{
volatile long loc[2];
loc[0] = *xp;
loc[1] = *yp;
*xp = loc[1];
*yp = loc[0];
}

swap_a:
movq
movq
movq
movq
movq
movq
movq
movq
ret
Avoiding Stack Pointer Change
 128 bytes after the %rsp is scratch
space for functions (red zone).
 Only good for leaf-functions
(%rdi), %rax
%rax, -24(%rsp)
(%rsi), %rax
%rax, -16(%rsp)
-16(%rsp), %rax
%rax, (%rdi)
-24(%rsp), %rax
%rax, (%rsi)
rtn Ptr
−8
%rsp
unused
−16 loc[1]
−24 loc[0]
128 bytes
…
6
x86-64 NonLeaf call without Stack Frame
/* Swap a[i] & a[i+1] */
void swap_ele(long a[], int i)
{
swap(&a[i], &a[i+1]);
}
swap_ele:
movslq
leaq
leaq
movq
call
ret

%esi,%rsi
8(%rdi,%rsi,8), %rax
(%rdi,%rsi,8), %rdi
%rax, %rsi
swap
Data stored in registers instead of
stack
#
#
#
#
Sign extend i
&a[i+1]
&a[i] (1st arg)
(2nd arg)
7
x86-64 Stack Frame Example
Swap_and_sum:
movq
%rbx, -16(%rsp)
long sum = 0;
movq
%rbp, -8(%rsp)
/* Swap a[i] & a[i+1] */
subq
$16, %rsp
void swap_and_sum(long a[], int i)
movslq %esi, %rax
{
leaq
8(%rdi,%rax,8),
swap(&a[i], &a[i+1]);
%rbx
sum += (a[i]*a[i+1]);
leaq
(%rdi,%rax,8), %rbp
}
movq
%rbx, %rsi
movq
%rbp, %rdi
call
swap
 Keeps values of &a[i] and
movq
(%rbx), %rax
&a[i+1] in callee save
imulq
(%rbp), %rax
registers
addq
%rax, sum(%rip)
movq
(%rsp), %rbx
movq
8(%rsp), %rbp
 Must set up stack frame to
addq
$16, %rsp
save these registers
ret
8
Understanding x86-64 Stack Frame
Swap_and_sum:
movq
%rbx, -16(%rsp)
movq
%rbp, -8(%rsp)
subq
$16, %rsp
movslq %esi,%rax
leaq
8(%rdi,%rax,8), %rbx
leaq
(%rdi,%rax,8), %rbp
movq
%rbx, %rsi
movq
%rbp, %rdi
call
swap
movq
(%rbx), %rax
imulq
(%rbp), %rax
addq
%rax, sum(%rip)
movq
(%rsp), %rbx
movq
8(%rsp), %rbp
addq
$16, %rsp
ret
#
#
#
#
#
#
#
#
Save %rbx
Save %rbp
Allocate stack frame
Extend i
&a[i+1] (callee save)
&a[i]
(callee save)
2nd argument
1st argument
#
#
#
#
#
#
Get a[i+1]
Multiply by a[i]
Add to sum
Restore %rbx
Restore %rbp
Deallocate frame
9
Understanding x86-64 Stack Frame
movq
movq
%rbx, -16(%rsp)
%rbp, -8(%rsp)
# Save %rbx
%rsp
# Save %rbp
subq
$16, %rsp
# Allocate stack frame
rtn addr
−8 %rbp
−16 %rbx
  
%rsp
movq
movq
(%rsp), %rbx
8(%rsp), %rbp
# Restore %rbx
# Restore %rbp
addq
$16, %rsp
# Deallocate frame
rtn addr
+8 %rbp
%rbx
10
Interesting Features of Stack Frame

Allocate entire frame at once
 All stack accesses can be relative to %rsp
 Do by decrementing stack pointer
 Can delay allocation, since safe to temporarily use red zone

Simple deallocation
 Increment stack pointer
 No base/frame pointer needed
11
x86-64 Procedure Summary

Heavy use of registers
 Parameter passing
 More temporaries using registers

Minimal use of stack




Sometimes none
Allocate/deallocate entire frame at once
All stack accesses can be relative to %rsp
Usually no base/frame pointer needed
12
Today


Procedures (x86-64)
Arrays
 One-dimensional
 Multi-dimensional (nested)
 Multi-level

Structures
13
Basic Data Types

Integral
 Stored & operated on in general (integer) registers
 Signed vs. unsigned depends on instructions used
Intel
byte
word
double word
quad word

ASM
b
w
l
q
Bytes
1
2
4
8
C
[unsigned]
[unsigned]
[unsigned]
[unsigned]
char
short
int
long int (x86-64)
Floating Point
 Stored & operated on in floating point registers
Intel
Single
Double
Extended
ASM
s
l
t
Bytes
4
8
10/12/16
C
float
double
long double
14
Array Allocation

Basic Principle
T A[L];
 Array of data type T and length L
 Contiguously allocated region of L * sizeof(T) bytes
char string[12];
x
x + 12
int val[5];
x
x+4
x+8
x + 12
x + 16
x + 20
double a[3];
x
x+8
char *p[3];
x + 16
x + 24
IA32
x
x+4
x+8
x + 12
x86-64
x
x+8
x + 16
x + 24
15
Array Access

Basic Principle
Type A[Length];
 Identifier A can be used as a pointer to array element 0: Type T*
 x is the address of A
int val[5];
1
x =A

Reference
val[4]
val
val+1
&val[2]
val[5]
*(val+1)
val + i
5
x+4
2
x+ 8
Type
Value
int
int
int
int
int
int
int
3
x
x+4
x+8
??
5
x+4i
*
*
*
*
1
x + 12
3
x + 16
x + 20
16
Array Example
#define ZLEN 5
typedef int zip[ZLEN];
zip nyu = { 1, 0, 0, 0, 3 };
zip mit = { 0, 2, 1, 3, 9 };
zip nyu;
1
16
zip mit;

20
0
36

0
0
24
2
40
0
28
1
44
3
32
3
48
36
9
52
56
Declaration “zip nyu” equivalent to “int nyu[5]”
Example arrays were allocated in successive 20 byte blocks
 Not guaranteed to happen in general
17
Array Accessing Example
zip nyu;
1
16
0
20
0
24
0
3
28
int get_digit(zip z, int digit)
{
return z[digit];
}
32



IA32
# %edx = z
# %eax = digit
movl (%edx,%eax,4),%eax

# z[digit]
36
Store starting address of
array in %edx (base)
Store array index in %eax
(index)
Size of data type in
multiplier
Use memory reference
(%edx,%eax,4):
results in:
%edx + 4*%eax
(base pointer + index*size)
18
Array Loop Example (IA32)
void zip_inc(zip z) {
int i;
for (i = 0; i < ZIP_LEN; i++)
z[i]++;
}
# edx
movl
.L4:
addl
addl
cmpl
jne
= z
$0, %eax
#
%eax = i
# loop:
$1, (%edx,%eax,4) #
z[i]++
$1, %eax
#
i++
$5, %eax
#
i:5 (ZIP_LEN)
.L4
#
if !=, goto loop
19
2D Array: Example
int zips[4][5] =
{{1, 0, 0, 0, 3},
{1, 0, 0, 1, 0 },
{1, 0, 0, 1, 1 },
{1, 0, 0, 0, 5 }};
1 0 0 0 3 1 0 0 1 0 1 0 0 1 1 1 0 0 0 5
76

96
116
136
156
2D array “Row-Major” ordering of all elements
int A[R][C];
 A[i][j] == *(A+i*C+j)
20
2D Array Element Access Code
int get_zip_digit(int index, int digit)
{
return zips[index][digit];
}
movl
leal
addl
movl

8(%ebp), %eax
(%eax,%eax,4), %eax
12(%ebp), %eax
zips(,%eax,4), %eax
#
#
#
#
index
5*index
5*index+digit
offset 4*(5*index+digit)
Array Elements
 zips[index][digit] is int
 Address: zips + 20*index + 4*digit
21
Array of pointers: Example

zip nyu = { 1, 0, 0, 0, 3 };
zip mit = { 0, 2, 1, 3, 9 };
zip cmu = { 1, 5, 2, 1, 3};

#define UCOUNT 3
int *univ[UCOUNT] = {mit, nyu, cmu};
nyu
univ
160
36
164
16
168
56
mit
1
16
0
20
0
cmu 36
0
24
2
40
60
0
28
1
44
5
1
56

Variable univ denotes
array of 3 elements
Each element is a pointer
 4 bytes
Each pointer points to array
of int’s
32
3
48
2
64
3
9
52
1
68
36
56
3
72
76
22
Element Access in Multi-Level Array
int get_univ_digit (int index, int digit)
{
return univ[index][digit];
}
movl
movl
movl
movl

8(%ebp), %eax
univ(,%eax,4), %edx
12(%ebp), %eax
(%edx,%eax,4), %eax
#
#
#
#
index
p = univ[index]
dig
p[digit]
Computation (IA32)
 Element access Mem[Mem[univ+4*index]+4*dig]
 Must do two memory reads


First get pointer to row array
Then access element within array
23
Array Element Accesses
Nested array
int get_pgh_digit
(int index, int dig)
{
return pgh[index][dig];
}
Multi-level array
int get_univ_digit
(int index, int dig)
{
return univ[index][dig];
}
Accesses looks similar in C, but addresses very different:
Mem[pgh+20*index+4*dig]
Mem[Mem[univ+4*index]+4*dig]
24
Today


Procedures (x86-64)
Arrays
 One-dimensional
 Multi-dimensional (nested)
 Multi-level

Structures
 Allocation
 Access
25
Structure Allocation
struct rec {
int a[3];
int i;
struct rec *n;
};

Memory Layout
a
0
i
n
12 16 20
Concept




Contiguously-allocated region of memory
Offset of each struct member determined at compile time
Refer to members within structure by names
Members may be of different types
26
Structure Access
struct rec {
int a[3];
int i;
struct rec *n;
};

r
a
0
r+12
i
n
12 16 20
Accessing Structure Member
 Pointer indicates first byte of structure
 Access elements with offsets
void set_i(struct rec *r, int val)
{
r->i = val;
}
IA32 Assembly
# %edx = val
# %eax = r
movl %edx, 12(%eax) # Mem[r+12] = val
27
Following Linked List
void set_all_a_i_values
(struct rec *r, int val)
{
while (r) {
int i = r->i;
r->a[i] = val;
r = r->n;
}
}
.L17:
movl
movl
movl
testl
jne
struct rec {
int a[3];
int i;
struct rec *n;
};
a
0
i
n
12 16 20
Element i
Register
Value
%edx
r
%ecx
val
#
12(%edx), %eax
#
%ecx, (%edx,%eax,4) #
16(%edx), %edx
#
%edx, %edx
#
.L17
#
loop:
r->i
r->a[i] = val
r = r->n
Test r
If != 0 goto loop
28
Summary

Procedures in x86-64
 Stack frame is relative to stack pointer
 Parameters passed in registers

Arrays
 One-dimensional
 Multi-dimensional (nested)
 Multi-level

Structures
 Allocation
 Access
29
Download