Uploaded by Jeong ho Lee

04 CPU instruction execution engine

advertisement
CPU instruction execution
1
Seehwan Yoo
Dankook University
Mobile OS Laboratory
Stored program computer
• Remember IAS?
• Real implementation of
von Neumann computer
2
Arithmetic-logic unit (ALU)
AC
MQ
Inputoutput
equipment
Arithmetic-logic
circuits
• Put all your program into main
memory
MBR
• Execute the instruction one by one
• From where the PC points at
• Consists of
•
•
•
•
ALU
Program Control Unit
Main memory
I/O devices
Instructions
and data
IBR
PC
IR
Control
circuits
MAR
Control
signals
Program control unit
Mobile OS Laboratory
Main
memory
M
Addresses
Register
• We will refer register as ‘CPU register’
• Temporary storage inside CPU
• Store some value for some time
• Can be changed as program executes
• Load/store
• To fetch data from memory (to CPU register)
• To save data to memory (from CPU register)
Mobile OS Laboratory
3
Machine Primitives
• Arithmetic operations, moving values between regs
• Add/subtract/multiply/divide
• Memory operations
• Load/store
• Comparison of values
• Cmp/set less than
• Jump
• Jump
• Conditional jump
• Branch if equal / branch if not equal
• Function call
• Call / jump and link
Mobile OS Laboratory
4
Compiler – from source code
into machine executable binary
• Three different ways of getting sum from 1 to 10
• Add one by one
• Using iteration for-loop
• Using a short-cut equation
• Regardless of the language,
• The logic is reduced to machine instructions
Mobile OS Laboratory
5
Execution of Addition Program
• Assume that registers
(R0, R1, R2, …)
• R0: holds count
• R1: holds sum
• Instructions for v1
move r0, 0
move r1, 0
add r0, r0, 1
add r1, r1, r0
add r0, r0, 1
add r1, r1, r0
…
Mobile OS Laboratory
--- v2 --move r0, 0
move r1, 0
REPT:
add r0, r0, 1
add r1, r1, r0
compare r0, 10
jump_less_than REPT
6
--- v3 --move r0, 1
move r1, 10
move r2, 2
add r0, r0, r1
multiply r0, r0, r1
divide r0, r0, r2
Real binary operations
• Reading format
addr: instruction mnemonics
• Different instructions have
different lengths
• Intel x86_64
Mobile OS Laboratory
000000000040052d <main>:
40052d: 55
push
40052e: 48 89 e5
mov
400531: 48 83 ec 10
sub
400535: c7 45 f8 00 00 00 00
40053c: c7 45 fc 00 00 00 00
400543: 83 45 f8 01
addl
400547: 8b 45 f8
mov
40054a: 01 45 fc
add
40054d: 83 45 f8 01
addl
400551: 8b 45 f8
mov
400554: 01 45 fc
add
400557: 83 45 f8 01
addl
40055b: 8b 45 f8
mov
40055e: 01 45 fc
add
400561: 83 45 f8 01
addl
400565: 8b 45 f8
mov
400568: 01 45 fc
add
40056b: 83 45 f8 01
addl
40056f: 8b 45 f8
mov
400572: 01 45 fc
add
400575: 83 45 f8 01
addl
400579: 8b 45 f8
mov
40057c: 01 45 fc
add
40057f: 83 45 f8 01
addl
400583: 8b 45 f8
mov
400586: 01 45 fc
add
400589: 83 45 f8 01
addl
40058d: 8b 45 f8
mov
400590: 01 45 fc
add
400593: 83 45 f8 01
addl
400597: 8b 45 f8
mov
40059a: 01 45 fc
add
40059d: 83 45 f8 01
addl
4005a1: 8b 45 f8
mov
4005a4: 01 45 fc
add
4005a7: 8b 45 fc
mov
4005aa: 89 c6
mov
4005ac: bf c4 06 40 00
mov
4005b1: b8 00 00 00 00
mov
4005b6: e8 55 fe ff ff
callq
%rbp
%rsp,%rbp
$0x10,%rsp
movl $0x0,-0x8(%rbp)
movl $0x0,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
$0x1,-0x8(%rbp)
-0x8(%rbp),%eax
%eax,-0x4(%rbp)
-0x4(%rbp),%eax
%eax,%esi
$0x4006c4,%edi
$0x0,%eax
400410 <printf@plt>
7
4005bb:
4005c2:
4005c9:
4005cb:
4005ce:
4005d1:
4005d5:
4005d9:
4005db:
4005de:
4005e0:
4005e5:
4005ea:
c7 45 fc 00 00 00 00
c7 45 f8 00 00 00 00
eb 0a
8b 45 f8
01 45 fc
83 45 f8 01
83 7d f8 0a
7e f0
8b 45 fc
89 c6
bf c4 06 40 00
b8 00 00 00 00
e8 21 fe ff ff
movl $0x0,-0x4(%rbp)
movl $0x0,-0x8(%rbp)
jmp 4005d5 <main+0xa8>
mov -0x8(%rbp),%eax
add %eax,-0x4(%rbp)
addl $0x1,-0x8(%rbp)
cmpl $0xa,-0x8(%rbp)
jle 4005cb <main+0x9e>
mov -0x4(%rbp),%eax
mov %eax,%esi
mov $0x4006c4,%edi
mov $0x0,%eax
callq 400410 <printf@plt>
4005ef:
4005f6:
4005fd:
400600:
400603:
400606:
40060a:
40060d:
400610:
400612:
400615:
400617:
400619:
40061c:
40061f:
400621:
400626:
40062b:
400630:
400635:
400636:
400637:
40063e:
c7 45 fc 01 00 00 00
c7 45 f8 0a 00 00 00
8b 45 f8
01 45 fc
8b 45 fc
0f af 45 f8
89 45 fc
8b 45 fc
89 c2
c1 ea 1f
01 d0
d1 f8
89 45 fc
8b 45 fc
89 c6
bf c4 06 40 00
b8 00 00 00 00
e8 e0 fd ff ff
b8 00 00 00 00
c9
c3
66 0f 1f 84 00 00 00
00 00
movl $0x1,-0x4(%rbp)
movl $0xa,-0x8(%rbp)
mov -0x8(%rbp),%eax
add %eax,-0x4(%rbp)
mov -0x4(%rbp),%eax
imul -0x8(%rbp),%eax
mov %eax,-0x4(%rbp)
mov -0x4(%rbp),%eax
mov %eax,%edx
shr $0x1f,%edx
add %edx,%eax
sar %eax
mov %eax,-0x4(%rbp)
mov -0x4(%rbp),%eax
mov %eax,%esi
mov $0x4006c4,%edi
mov $0x0,%eax
callq 400410 <printf@plt>
mov $0x0,%eax
leaveq
retq
nopw 0x0(%rax,%rax,1)
Intel binary interpret
• Done by single operation
• Sub for subtraction
• Intel instructions are slightly complex
• addl
• $0x1, -0x8(%regB)
• M[regB - 0x8] = M[regB - 0x8] + 0x1
• mov
• -0x8(%regA), %regB
• regB = M[regA - 0x8]
• add
• %regA, -0x4(%regB)
• M[regB - 0x4] = M[regB - 0x4] + regA
Mobile OS Laboratory
8
000000000040052d <main>:
40052d: 55
push %rbp
40052e: 48 89 e5
mov %rsp,%rbp
400531: 48 83 ec 10
sub $0x10,%rsp
400535: c7 45 f8 00 00 00 00
movl $0x0,-0x8(%rbp)
40053c: c7 45 fc 00 00 00 00
movl $0x0,-0x4(%rbp)
400543: 83 45 f8 01
addl $0x1,-0x8(%rbp)
400547: 8b 45 f8
mov -0x8(%rbp),%eax
40054a: 01 45 fc
add %eax,-0x4(%rbp)
40054d: 83 45 f8 01
addl $0x1,-0x8(%rbp)
400551: 8b 45 f8
mov -0x8(%rbp),%eax
400554: 01 45 fc
add %eax,-0x4(%rbp)
400557: 83 45 f8 01
addl $0x1,-0x8(%rbp)
40055b: 8b 45 f8
mov -0x8(%rbp),%eax
rsp, rbp, eax: Intel registers
40055e: 01 45 fc
add %eax,-0x4(%rbp)
400561: 83 45 f8 01
addl $0x1,-0x8(%rbp)
400565: 8b 45 f8
mov -0x8(%rbp),%eax
400568: 01 45 fc
add %eax,-0x4(%rbp)
40056b: 83 45 f8 01
addl $0x1,-0x8(%rbp)
40056f: 8b 45 f8
mov -0x8(%rbp),%eax
400572: 01 45 fc
add %eax,-0x4(%rbp)
400575: 83 45 f8 01
addl $0x1,-0x8(%rbp)
• addl : increase (and update) count
• mov, add : increase (and update) sum
MIPS binary
•
•
•
•
9
Fixed size
Variable is in memory
Addition with registers
Load word
• LW regA, num (regB)
• regA = M[regB + num]
• Store world
• SW regA, num (regB)
• M[regB + num] = regA
• Add unsigned integer (addiu)
• addiu regA, regB, num
• regA = regB + num
• Add unsigned (addu)
• regA, regB, regC
• regA = regB + regC
Mobile OS Laboratory
v0, v1, s8, sp: MIPS registers
• lw, addiu, sw : increase and update count
• lw, lw, addu, sw : increase and update sum
We will use MIPS!
• Because it is simple
• Use 32-bit registers
•
•
•
•
Variables are in memory
Load values from mem to regs
normal operations use registers
CPU operations are simple, using regs
• Literals, regs
• Small integer values within instructions
• Addiu, LW, SW
• Registers have nicknames
• You don’t have to remember,
but you will remember some of them
• Compilers automatically generates code
Mobile OS Laboratory
10
• MIPS registers usage, nickname
•
•
•
•
•
•
•
•
•
•
•
•
•
R0~R31
R0: zero register, always 0
R1: assembly temporary
R2, R3: v0,v1 (return value)
R4~R7: a0~a3 (function arguments)
R8~R15: t0~t7 (temporary registers)
R16~R23: s0~s7 (saved temporary)
R24, R25: t8, t9 (temporary reg)
R26, R27: k0, k1 (OS kernel)
R28: gp (global pointer)
R29: sp (stack pointer)
R30: fp (frame pointer)
R31: ra (return address)
Memory operation (again)
• Load
•
•
•
•
•
Get value from memory address to register
LW regA, num (regB)
Memory address: num (regB)// M[regB + num]
Register: regA
regA ç M[regB + num]
• Store
•
•
•
•
•
Store value from register to memory address
SW regA, num (regB)
Register: regA
Memory address: num (regB)// M[regB + num]
M[regB + num] ç regA
Mobile OS Laboratory
11
a = a+1;
• Variable a is stored in memory
• Load value from a’ location into regA
• lw regA, 8(sp) // assume a is in M[sp+8]
• Add 1
• Update register regA
• addiu regA, regA, 1
• Store updated value to a’s location
• sw regA, 8(sp)
Mobile OS Laboratory
12
c = a+b;
• Variable a, b, c are stored in memory
• Load value from a’ location into regA, from b’ location into regB,
• lw regA, 8(sp) // assume a is in M[sp+8]
• lw regB, 12(sp) // assume b is in M[sp+12]
• Add a+b, result in regA
• addu regA, regA, regB
• Store updated value to a’s location
• sw regA, 16(sp) // assume c is in M[sp+16]
Mobile OS Laboratory
13
Register usages
• Compiler selects the registers to use
• MIPS: sp, fp, ra, v0, a0~a3, t0~t8, etc. (or r0~r31)
• Intel: ax, bx, cx, dx, si, di, cs, ds, fs, gs, etc.
• Types and registers
•
•
•
•
•
According to the types, the compiler generates proper offsets
int a[2]; b = a[1];
Load value from a’s location + 4 into register regA, and
Store regA into b’s location
Let’s assume a’s location &a, b’s location &b
•
•
•
•
mov t0, &a
mov t1, &b
lw t2, 4(t0)
sw t2, 0(t1)
Mobile OS Laboratory
14
Why addu/addiu separated?
• Instruction with registers
• Use when all the values are in registers
• Instruction with immediate value
• Numbers can be directly presented within instruction
• MIPS has 32-bit instruction encoding, limited size
• Cannot encode 32 bit numbers
• Pack instruction id, 3 register numbers, etc.
Mobile OS Laboratory
15
Logical operations
• Slide from
Computer Organization
and Design
16
Logical Operations
n
• Chapter 2
n
Instructions for bitwise manipulation
Operation
C
Java
MIPS
Shift left
<<
<<
sll
Shift right
>>
>>>
srl
Bitwise AND
&
&
and, andi
Bitwise OR
|
|
or, ori
Bitwise NOT
~
~
nor
Useful for extracting and inserting
groups of bits in a word
Chapter 2 — Instructions: Language of the Computer — 24
Mobile OS Laboratory
Logical operations, more
17
AND Operations
n
Useful to mask bits in a word
n
Select some bits, clear others to 0
and $t0, $t1, $t2
OR Operations
n
Useful to include bits in a word
n
$t2
0000 0000 0000 0000 0000 1101 1100 0000
$t1
0000 0000 0000 0000 0011 1100 0000 0000
$t0
0000 0000 0000 0000 0000 1100 0000 0000
Set some bits to 1, leave others unchanged
or $t0, $t1, $t2
$t2
0000 0000 0000 0000 0000 1101 1100 0000
$t1
0000 0000 0000 0000 0011 1100 0000 0000
$t0
0000 0000 0000 0000 0011 1101 1100 0000
Chapter 2 — Instructions: Language of the Computer — 25
NOT Operations
n
Useful to invert bits in a word
n
n
Change 0 to 1, and 1 to 0
MIPS has NOR 3-operand instruction
n
a NOR b == NOT ( a OR b )
nor $t0, $t1, $zero
Register 0: always
read as zero
Chapter 2 — Instructions: Language of the Computer — 26
$t1
0000 0000 0000 0000 0011 1100 0000 0000
$t0
1111 1111 1111 1111 1100 0011 1111 1111
Chapter 2 — Instructions: Language of the Computer — 27
Mobile OS Laboratory
Conditional Operations
• Branch to a labeled instruction
if a condition is true
• C code:
• Otherwise, continue
sequentially
• beq rs, rt, L1
• if (rs == rt) branch to
instruction labeled L1;
• bne rs, rt, L1
• if (rs != rt) branch to
instruction labeled L1;
• j L1
• unconditional jump to
instruction labeled L1
Mobile OS Laboratory
if (i==j) f = g+h;
else f = g-h;
• f, g, … in $s0, $s1, …
• Compiled MIPS code:
bne
add
j
Else: sub
Exit: …
$s3, $s4, Else
$s0, $s1, $s2
Exit
$s0, $s1, $s2
18
Homework: Compiling Loop Statements
• By next week
• C code:
while (save[i] == k) i += 1;
• i in $s3, k in $s5, address of save in $s6
• Compiled MIPS code:
Loop:
…
(fill in here)
j
Exit:
Loop
Mobile OS Laboratory
19
Side note: Basic block
• A basic block is a sequence of instructions with
• No embedded branches (except at end)
• No branch targets (except at beginning)
• Single entry/single exit instruction block
• A compiler identifies basic blocks for optimization
• An advanced processor can accelerate execution of basic blocks
Mobile OS Laboratory
20
Summary
• MIPS Assembly instructions
• Arithmetic operations
• And, or, sll, slr, etc.
•
•
•
•
Operation with registers
Variable in memory
Load before use, store back after use
Conditional execution
• Slt (set less than)
• Bne (branch not equal) / beq (branch if equals)
• Basic block
Mobile OS Laboratory
21
Download