Lecture 10: Machine-Level Programming IV: Data

advertisement
CSCI-UA.0201-003
Computer Systems Organization
Lecture 10: Machine-Level
Programming IV: Data
Mohamed Zahran (aka Z)
mzahran@cs.nyu.edu
http://www.mzahran.com
Some slides adapted
(and slightly modified)
from:
• Clark Barrett
• Jinyang Li
• Randy Bryant
• Dave O’Hallaron
Today
• Procedures in x86-64
• Arrays
– One-dimensional
– Multi-dimensional (nested)
– Multi-level
• Structures
– Allocation
– Access
x86-64 Registers
• Arguments passed to functions via registers
– If more than 6 integral then pass rest on stack
– These registers can be used as caller-saved as well
• All references to stack frame via stack pointer
– Eliminates need to update %ebp/%rbp
x86-64 Integer Registers:
Usage Conventions
%rax
Return value
%r8
Argument #5
%rbx
Callee saved
%r9
Argument #6
%rcx
Argument #4
%r10
Caller saved
%rdx
Argument #3
%r11
Caller Saved
%rsi
Argument #2
%r12
Callee saved
%rdi
Argument #1
%r13
Callee saved
%rsp
Stack pointer
%r14
Callee saved
%rbp
Callee saved
%r15
Callee saved
x86-64 Long Swap
void swap_l(long *xp, long *yp)
{
long t0 = *xp;
long t1 = *yp;
*xp = t1;
*yp = t0;
}
swap:
movq
movq
movq
movq
ret
(%rdi), %rdx
(%rsi), %rax
%rax, (%rdi)
%rdx, (%rsi)
• Operands passed in registers
– First (xp) in %rdi, second (yp) in %rsi
– 64-bit pointers
• No stack operations required (except ret)
– Can hold all local information in registers
About X86-64
• Many functions do not require a stack
frame. Only functions that cannot keep
all local variables in registers need to
allocate space on the stack.
• There is no frame pointer. Instead,
references to stack locations are made
relative to the stack pointer
x86-64 Locals
/* Swap, using local array */
void swap_a(long *xp, long *yp)
{
volatile long loc[2];
loc[0] = *xp;
loc[1] = *yp;
*xp = loc[1];
*yp = loc[0];
}
swap_a:
movq
movq
movq
movq
movq
movq
movq
movq
ret
(%rdi), %rax
%rax, -24(%rsp)
(%rsi), %rax
%rax, -16(%rsp)
-16(%rsp), %rax
%rax, (%rdi)
-24(%rsp), %rax
%rax, (%rsi)
• Avoiding Stack Pointer Change
– Can hold all information within
small window beyond stack
pointer
rtn Ptr
−8
unused
−16 loc[1]
−24 loc[0]
Low address here
%rsp
x86-64 Non Leaf without Stack Frame
Call other functions
/* Swap a[i] & a[i+1] */
void swap_ele(long a[], int i)
{
swap(&a[i], &a[i+1]);
}
• What’s on swap_ele’s stack frame?
%esi
%rdi
swap_ele:
movslq
leaq
leaq
movq
call
ret
%esi,%rsi
8(%rdi,%rsi,8), %rax
(%rdi,%rsi,8), %rdi
%rax, %rsi
swap
#
#
#
#
Sign extend i
&a[i+1]
&a[i] (1starg)
(2nd arg)
x86-64 Stack Frame Example
long sum = 0;
void swap_ele_su
(long a[], int i)
{
swap(&a[i], &a[i+1]);
sum += (a[i]*a[i+1]);
}
swap_ele_su:
movq
%rbx, -16(%rsp)
movq
%rbp, -8(%rsp)
subq
$16, %rsp
movslq %esi,%rax
leaq
8(%rdi,%rax,8), %rbx
leaq
(%rdi,%rax,8), %rbp
movq
%rbx, %rsi
movq
%rbp, %rdi
call
swap
movq
imulq
addq
movq
movq
addq
ret
(%rbx), %rax
(%rbp), %rax
%rax, sum(%rip)
(%rsp), %rbx
8(%rsp), %rbp
$16, %rsp
#
#
#
#
#
#
#
#
Save %rbx
Save %rbp
Allocate stack frame
Extend i
&a[i+1] (callee save)
&a[i]
(callee save)
nd
2 argument
1st argument
#
#
#
#
#
#
Get a[i+1]
Multiply by a[i]
Add to sum
Restore %rbx
Restore %rbp
Deallocate frame
Understanding x86-64 Stack Frame
movq
movq
%rbx, -16(%rsp) # Save %rbx
%rbp, -8(%rsp) # Save %rbp
subq
$16, %rsp
%rsp
rtn addr
−8 %rbp
−16 %rbx
# Allocate stack frame
  
%rsp
movq
movq
(%rsp), %rbx
8(%rsp), %rbp
# Restore %rbx
# Restore %rbp
addq
$16, %rsp
# Deallocate frame
rtn addr
+8 %rbp
%rbx
x86-64 Procedure Summary
• Heavy use of registers
– Parameter passing
– More temporaries using registers
• Minimal use of stack
– Sometimes none except for return addresses
– Allocate/de-allocate entire frame at once
– All stack accesses can be relative to %rsp
– No base/frame pointer needed
Today
• Procedures in x86-64
• Arrays
– One-dimensional
– Multi-dimensional (nested)
– Multi-level
• Structures
– Allocation
– Access
Basic Data Types
• Integral
– Stored & operated on in general (integer) registers
– Signed vs. unsigned depends on instructions used
Intel
byte
word
double word
quad word
ASM
b
w
l
q
Bytes
1
2
4
8
C
[unsigned]
[unsigned]
[unsigned]
[unsigned]
char
short
int
long int (x86-64)
• Floating Point
– Stored & operated on in floating point registers
Intel
Single
Double
Extended
ASM
s
l
t
Bytes
4
8
10/12/16
C
float
double
long double
Array Allocation
• Basic Principle
T A[L];
– Array of data type T and length L
– Contiguously allocated region of L * sizeof(T) bytes
char string[12];
x
x + 12
int val[5];
x
x+4
x+8
x + 12
x + 16
x + 20
double a[3];
x
x+8
x + 16
char *p[3];
x + 24
IA32
x
x+4
x+8
x + 12
x86-64
x
x+8
x + 16
x + 24
Array Access
• Basic Principle
T A[L];
int val[5];
1
x=A
x+4
• Reference Type
val[4]
val
val+1
&val[2]
val[5]
*(val+1)
val + i
int
int
int
int
int
int
int
5
*
*
*
2
x+8
Value
3
x
X+4
X+8
??
*
5
X+4i
1
x + 12
3
x + 16
x + 20
Array Example
#define ZLEN 5
typedef int zip_dig[ZLEN];
zip_dig nyu = { 1, 0, 0, 0, 3 };
zip_dig mit = { 0, 2, 1, 3, 9 };
zip_dig nyu;
1
16
zip_dig mit;
0
20
0
36
0
24
2
40
0
28
1
44
3
32
3
48
36
9
52
56
• Declaration “zip_dig nyu” equivalent to “int nyu[5]”
• Example 2 arrays allocated in successive 20 byte blocks
– Not guaranteed to happen in general
Array Accessing Example
zip_dig nyu;
1
16
0
20
0
24
0
28
int get_digit
(zip_dig z, int dig)
{
return z[dig];
}
32

IA32
# %edx = z
# %eax = dig
movl (%edx,%eax,4),%eax
3

# z[dig]


36
Register %edx contains
starting address of array
Register %eax contains
array index
Desired digit at
4*%eax + %edx
Use memory reference
(%edx,%eax,4)
Array Loop Example (IA32)
void zincr(zip_dig z) {
int i;
for (i = 0; i < ZLEN; i++)
z[i]++;
}
# edx
movl
.L4:
addl
addl
cmpl
jne
= z
$0, %eax
i  %eax
base address of Z  %edx
int  4 bytes
#
%eax = i
# loop:
$1, (%edx,%eax,4) #
z[i]++
$1, %eax
#
i++
$5, %eax
#
i:5
.L4
#
if !=, goto loop
2D Array: Example
int pgh[4][5] =
{{1, 5, 2, 0,
{1, 5, 2, 1,
{1, 5, 2, 1,
{1, 5, 2, 2,
6},
3 },
7 },
1 }};
1 5 2 0 6 1 5 2 1 3 1 5 2 1 7 1 5 2 2 1
76
96
116
136
156
• 2D array: “Row-Major” ordering of all elements
int A[R][C];
– A[i][j] == *(A+i*C+j)
2D Array Element Access Code
int get_pgh_digit(int index, int dig)
{
return pgh[index][dig];
}
phg[4][5]
movl
leal
addl
movl
8(%ebp), %eax
(%eax,%eax,4), %eax
12(%ebp), %eax
pgh(,%eax,4), %eax
#
#
#
#
index
5*index
5*index+dig
offset 4*(5*index+dig)
• Array Elements
– pgh[index][dig] is int
– Address: pgh + 20*index + 4*dig
Array of pointers: Example
zip_dig nyu = { 1, 0, 0, 0, 3 };
zip_dig mit = { 0, 2, 1, 3, 9 };
zip_dig cmu = { 1, 5, 2, 1, 3};
#define UCOUNT 3
int *univ[UCOUNT] = {mit, nyu, cmu};
nyu
univ
160
36
164
16
168
56
mit
1
16
0
20
0
cmu 36
24
2
40
60
0
28
1
44
5
1
56
0
32
3
48
2
64
3
9
52
1
68
36
56
3
72
76
Element Access in Array of pointers
int get_univ_digit(int index, int dig)
{
return univ[index][dig];
}
movl
movl
movl
movl
8(%ebp), %eax
univ(,%eax,4), %edx
12(%ebp), %eax
(%edx,%eax,4), %eax
#
#
#
#
index
p = univ[index]
dig
p[dig]
• Computation (IA32)
– Element access Mem[Mem[univ+4*index]+4*dig]
– Must do two memory reads
• First get pointer to row array
• Then access element within array
Today
• Procedures in x86-64
• Arrays
– One-dimensional
– Multi-dimensional (nested)
– Multi-level
• Structures
– Allocation
– Access
Structure Allocation
struct rec {
int a[3];
int i;
struct rec *n;
};

Memory Layout
a
0
i
n
12 16 20
Struct members laid out contiguously in memory

Offset of each struct member determined at compile time
Structure Access
r
struct rec {
int a[3];
int i;
struct rec *n;
};
a
0
r+12
i
n
12 16 20
• Accessing Structure Member
– Pointer indicates first byte of structure
– Access elements with offsets
void
set_i(struct rec *r,
int val)
{
r->i = val;
}
IA32 Assembly
# %edx = val, %eax = r
movl %edx, 12(%eax) # Mem[r+12] = val
Following Linked List
struct rec {
int a[3];
int i;
struct rec *n;
};
void set_val
(struct rec *r, int val)
{
while (r) {
int i = r->i;
r->a[i] = val;
r = r->n;
}
}
a
0
i
n
12 16 20
Element i
# %edx = r, %ecx = val, %eax
.L17:
movl
12(%edx), %eax
movl
%ecx, (%edx,%eax,4)
movl
16(%edx), %edx
testl
%edx, %edx
jne
.L17
=
#
#
#
#
#
#
i
loop:
i = r->i
r->a[i] = val
r = r->n
Test r
If != 0 goto loop
Conclusions
• Procedures in x86-64
– Stack frame is relative to stack pointer
– Parameters passed in registers
• Arrays
– One-dimensional
– Multi-dimensional (nested)
– Multi-level
• Structures
– Allocation
– Access
Download