CSCI-UA.0201-003 Computer Systems Organization Lecture 10: Machine-Level Programming IV: Data Mohamed Zahran (aka Z) mzahran@cs.nyu.edu http://www.mzahran.com Some slides adapted (and slightly modified) from: • Clark Barrett • Jinyang Li • Randy Bryant • Dave O’Hallaron Today • Procedures in x86-64 • Arrays – One-dimensional – Multi-dimensional (nested) – Multi-level • Structures – Allocation – Access x86-64 Registers • Arguments passed to functions via registers – If more than 6 integral then pass rest on stack – These registers can be used as caller-saved as well • All references to stack frame via stack pointer – Eliminates need to update %ebp/%rbp x86-64 Integer Registers: Usage Conventions %rax Return value %r8 Argument #5 %rbx Callee saved %r9 Argument #6 %rcx Argument #4 %r10 Caller saved %rdx Argument #3 %r11 Caller Saved %rsi Argument #2 %r12 Callee saved %rdi Argument #1 %r13 Callee saved %rsp Stack pointer %r14 Callee saved %rbp Callee saved %r15 Callee saved x86-64 Long Swap void swap_l(long *xp, long *yp) { long t0 = *xp; long t1 = *yp; *xp = t1; *yp = t0; } swap: movq movq movq movq ret (%rdi), %rdx (%rsi), %rax %rax, (%rdi) %rdx, (%rsi) • Operands passed in registers – First (xp) in %rdi, second (yp) in %rsi – 64-bit pointers • No stack operations required (except ret) – Can hold all local information in registers About X86-64 • Many functions do not require a stack frame. Only functions that cannot keep all local variables in registers need to allocate space on the stack. • There is no frame pointer. Instead, references to stack locations are made relative to the stack pointer x86-64 Locals /* Swap, using local array */ void swap_a(long *xp, long *yp) { volatile long loc[2]; loc[0] = *xp; loc[1] = *yp; *xp = loc[1]; *yp = loc[0]; } swap_a: movq movq movq movq movq movq movq movq ret (%rdi), %rax %rax, -24(%rsp) (%rsi), %rax %rax, -16(%rsp) -16(%rsp), %rax %rax, (%rdi) -24(%rsp), %rax %rax, (%rsi) • Avoiding Stack Pointer Change – Can hold all information within small window beyond stack pointer rtn Ptr −8 unused −16 loc[1] −24 loc[0] Low address here %rsp x86-64 Non Leaf without Stack Frame Call other functions /* Swap a[i] & a[i+1] */ void swap_ele(long a[], int i) { swap(&a[i], &a[i+1]); } • What’s on swap_ele’s stack frame? %esi %rdi swap_ele: movslq leaq leaq movq call ret %esi,%rsi 8(%rdi,%rsi,8), %rax (%rdi,%rsi,8), %rdi %rax, %rsi swap # # # # Sign extend i &a[i+1] &a[i] (1starg) (2nd arg) x86-64 Stack Frame Example long sum = 0; void swap_ele_su (long a[], int i) { swap(&a[i], &a[i+1]); sum += (a[i]*a[i+1]); } swap_ele_su: movq %rbx, -16(%rsp) movq %rbp, -8(%rsp) subq $16, %rsp movslq %esi,%rax leaq 8(%rdi,%rax,8), %rbx leaq (%rdi,%rax,8), %rbp movq %rbx, %rsi movq %rbp, %rdi call swap movq imulq addq movq movq addq ret (%rbx), %rax (%rbp), %rax %rax, sum(%rip) (%rsp), %rbx 8(%rsp), %rbp $16, %rsp # # # # # # # # Save %rbx Save %rbp Allocate stack frame Extend i &a[i+1] (callee save) &a[i] (callee save) nd 2 argument 1st argument # # # # # # Get a[i+1] Multiply by a[i] Add to sum Restore %rbx Restore %rbp Deallocate frame Understanding x86-64 Stack Frame movq movq %rbx, -16(%rsp) # Save %rbx %rbp, -8(%rsp) # Save %rbp subq $16, %rsp %rsp rtn addr −8 %rbp −16 %rbx # Allocate stack frame %rsp movq movq (%rsp), %rbx 8(%rsp), %rbp # Restore %rbx # Restore %rbp addq $16, %rsp # Deallocate frame rtn addr +8 %rbp %rbx x86-64 Procedure Summary • Heavy use of registers – Parameter passing – More temporaries using registers • Minimal use of stack – Sometimes none except for return addresses – Allocate/de-allocate entire frame at once – All stack accesses can be relative to %rsp – No base/frame pointer needed Today • Procedures in x86-64 • Arrays – One-dimensional – Multi-dimensional (nested) – Multi-level • Structures – Allocation – Access Basic Data Types • Integral – Stored & operated on in general (integer) registers – Signed vs. unsigned depends on instructions used Intel byte word double word quad word ASM b w l q Bytes 1 2 4 8 C [unsigned] [unsigned] [unsigned] [unsigned] char short int long int (x86-64) • Floating Point – Stored & operated on in floating point registers Intel Single Double Extended ASM s l t Bytes 4 8 10/12/16 C float double long double Array Allocation • Basic Principle T A[L]; – Array of data type T and length L – Contiguously allocated region of L * sizeof(T) bytes char string[12]; x x + 12 int val[5]; x x+4 x+8 x + 12 x + 16 x + 20 double a[3]; x x+8 x + 16 char *p[3]; x + 24 IA32 x x+4 x+8 x + 12 x86-64 x x+8 x + 16 x + 24 Array Access • Basic Principle T A[L]; int val[5]; 1 x=A x+4 • Reference Type val[4] val val+1 &val[2] val[5] *(val+1) val + i int int int int int int int 5 * * * 2 x+8 Value 3 x X+4 X+8 ?? * 5 X+4i 1 x + 12 3 x + 16 x + 20 Array Example #define ZLEN 5 typedef int zip_dig[ZLEN]; zip_dig nyu = { 1, 0, 0, 0, 3 }; zip_dig mit = { 0, 2, 1, 3, 9 }; zip_dig nyu; 1 16 zip_dig mit; 0 20 0 36 0 24 2 40 0 28 1 44 3 32 3 48 36 9 52 56 • Declaration “zip_dig nyu” equivalent to “int nyu[5]” • Example 2 arrays allocated in successive 20 byte blocks – Not guaranteed to happen in general Array Accessing Example zip_dig nyu; 1 16 0 20 0 24 0 28 int get_digit (zip_dig z, int dig) { return z[dig]; } 32 IA32 # %edx = z # %eax = dig movl (%edx,%eax,4),%eax 3 # z[dig] 36 Register %edx contains starting address of array Register %eax contains array index Desired digit at 4*%eax + %edx Use memory reference (%edx,%eax,4) Array Loop Example (IA32) void zincr(zip_dig z) { int i; for (i = 0; i < ZLEN; i++) z[i]++; } # edx movl .L4: addl addl cmpl jne = z $0, %eax i %eax base address of Z %edx int 4 bytes # %eax = i # loop: $1, (%edx,%eax,4) # z[i]++ $1, %eax # i++ $5, %eax # i:5 .L4 # if !=, goto loop 2D Array: Example int pgh[4][5] = {{1, 5, 2, 0, {1, 5, 2, 1, {1, 5, 2, 1, {1, 5, 2, 2, 6}, 3 }, 7 }, 1 }}; 1 5 2 0 6 1 5 2 1 3 1 5 2 1 7 1 5 2 2 1 76 96 116 136 156 • 2D array: “Row-Major” ordering of all elements int A[R][C]; – A[i][j] == *(A+i*C+j) 2D Array Element Access Code int get_pgh_digit(int index, int dig) { return pgh[index][dig]; } phg[4][5] movl leal addl movl 8(%ebp), %eax (%eax,%eax,4), %eax 12(%ebp), %eax pgh(,%eax,4), %eax # # # # index 5*index 5*index+dig offset 4*(5*index+dig) • Array Elements – pgh[index][dig] is int – Address: pgh + 20*index + 4*dig Array of pointers: Example zip_dig nyu = { 1, 0, 0, 0, 3 }; zip_dig mit = { 0, 2, 1, 3, 9 }; zip_dig cmu = { 1, 5, 2, 1, 3}; #define UCOUNT 3 int *univ[UCOUNT] = {mit, nyu, cmu}; nyu univ 160 36 164 16 168 56 mit 1 16 0 20 0 cmu 36 24 2 40 60 0 28 1 44 5 1 56 0 32 3 48 2 64 3 9 52 1 68 36 56 3 72 76 Element Access in Array of pointers int get_univ_digit(int index, int dig) { return univ[index][dig]; } movl movl movl movl 8(%ebp), %eax univ(,%eax,4), %edx 12(%ebp), %eax (%edx,%eax,4), %eax # # # # index p = univ[index] dig p[dig] • Computation (IA32) – Element access Mem[Mem[univ+4*index]+4*dig] – Must do two memory reads • First get pointer to row array • Then access element within array Today • Procedures in x86-64 • Arrays – One-dimensional – Multi-dimensional (nested) – Multi-level • Structures – Allocation – Access Structure Allocation struct rec { int a[3]; int i; struct rec *n; }; Memory Layout a 0 i n 12 16 20 Struct members laid out contiguously in memory Offset of each struct member determined at compile time Structure Access r struct rec { int a[3]; int i; struct rec *n; }; a 0 r+12 i n 12 16 20 • Accessing Structure Member – Pointer indicates first byte of structure – Access elements with offsets void set_i(struct rec *r, int val) { r->i = val; } IA32 Assembly # %edx = val, %eax = r movl %edx, 12(%eax) # Mem[r+12] = val Following Linked List struct rec { int a[3]; int i; struct rec *n; }; void set_val (struct rec *r, int val) { while (r) { int i = r->i; r->a[i] = val; r = r->n; } } a 0 i n 12 16 20 Element i # %edx = r, %ecx = val, %eax .L17: movl 12(%edx), %eax movl %ecx, (%edx,%eax,4) movl 16(%edx), %edx testl %edx, %edx jne .L17 = # # # # # # i loop: i = r->i r->a[i] = val r = r->n Test r If != 0 goto loop Conclusions • Procedures in x86-64 – Stack frame is relative to stack pointer – Parameters passed in registers • Arrays – One-dimensional – Multi-dimensional (nested) – Multi-level • Structures – Allocation – Access