introc

advertisement
You can create ANSI C programs on the UWO Engineering Linux
server. (ssh login to linux01.eng.uwo.ca)
[10:00pm linux01] cat > distrib.c
int p,q,r,s,t;
main() {
p=(q+r)*(s-t);
}
Programs can be cross-compiled for 8086 machines using bcc:
[10:02pm linux01] bcc -S -o distrib.s distrib.c
bcc produces a human-readable assembler output
when the -S switch is used. You may cat the file:
[10:02pm linux01] cat distrib.s
! 1
! 1 int p,q,r,s,t;
! 2 main() {
export _main
_main:
! 3 p=(q+r)*(s-t);
push
bp
mov
bp,sp
push
di
push
si
mov
ax,[_s]
sub
ax,[_t]
push
ax
mov
ax,[_q]
add
ax,[_r]
mov
cx,-6[bp]
imul
cx
inc
sp
inc
sp
mov
[_p],ax
! 4 }
pop
si
pop
di
pop
bp
ret
! 5
.data
.bss
.comm
_t,2
.comm
_q,2
.comm
_s,2
.comm
_p,2
.comm
_r,2
Here is an explanation of the .s listing:
The integer declarations cause the assembler to allocate
two-byte variables in the common block:
int p,q,r,s,t;
.data
.bss
.comm
_t,2
.comm
_q,2
.comm
_s,2
.comm
_p,2
.comm
_r,2
The main program assembles as a symbol, which begins by
saving the BP register on the stack, and resetting BP to
the current stack pointer. Then, DI and SI are pushed,
using up two 2-byte words:
export _main
_main:
main() {
push
bp
mov
bp,sp
push
di
push
si
The expression is implemented by first subtracting (s-t)
and pushing that result onto the stack. Subsequently, it
is retrieved from this location (-6 bytes from where the
base page BP points). This temporary 2-byte word is
deallocated from the stack by incrementing SP twice.
Finally, the result of the operations are stored in the
variable (int p) which the assembler names _p.
p=(q+r)*(s-t);
mov
ax,[_s]
sub
ax,[_t]
push
ax
mov
ax,[_q]
add
ax,[_r]
mov
cx,-6[bp]
imul
cx
inc
sp
inc
sp
mov
[_p],ax
Finally, at the end of the C program, the stack is rebalanced as the registers SI,DI, and BP are popped.
pop
si
pop
di
pop
bp
ret
Consider the following program which will read a character
and convert it to uppercase if it was a lowercase
character:
[9:14pm linux01] cat > upcase1.c
char ch,out;
main() {
ch = getchar();
if (('a' <= ch) && (ch <= 'z'))
out = ch + 'A' - 'a';
else
out = ch;
putchar(out);
}
This program compiles to the following
essential assembler instructions:
call
mov
mov
cmp
jb
.3:
mov
cmp
ja
.2:
mov
xor
add
mov
jmp .4
.1:
mov
mov
.4:
mov
xor
push
call
_getchar
[_ch],al
al,[_ch]
al,*$61
.1
al,[_ch]
al,*$7A
.1
al,[_ch]
ah,ah
ax,*-$20
[_out],al
al,[_ch]
[_out],al
al,[_out]
ah,ah
ax
_putchar
[9:14pm linux01] !bc
bcc -S -o upcase1.s upcase1.c
[9:14pm linux01] cat upcase1.s
! 1
! 1 char ch,out;
! 2 main() {
export _main
_main:
push
bp
mov
bp,sp
push
di
push
si
call
_getchar
mov
[_ch],al
mov
al,[_ch]
cmp
al,*$61
jb
.1
.3:
mov
al,[_ch]
cmp
al,*$7A
ja
.1
.2:
mov
al,[_ch]
xor
ah,ah
add
ax,*-$20
mov
[_out],al
jmp .4
.1:
mov
al,[_ch]
mov
[_out],al
! 8 putchar(out);
.4:
mov
al,[_out]
xor
ah,ah
push
ax
call
_putchar
inc
sp
inc
sp
! 9 }
pop
si
pop
di
pop
bp
ret
.data
.bss
.comm
_ch,1
.comm
_out,1
The following ANSI C program makes use of `short hand'
notations, but only leads to slightly smaller machine code:
[9:17pm linux01] cat > upcase2.c
char ch;
main() {
putchar((('a'<= (ch=getchar()))&&(ch<='z')) ?
(ch+'A'-'a') : ch );
}
call
mov
cmp
jb
.3:
mov
cmp
ja
.2:
mov
xor
add
jmp .4
.1:
mov
xor
.4:
push
call
_getchar
[_ch],al
al,*$61
.1
al,[_ch]
al,*$7A
.1
al,[_ch]
ah,ah
ax,*-$20
al,[_ch]
ah,ah
ax
_putchar
[9:17pm linux01] cat > upcase2.c
char ch;
main() {
putchar((('a'<= (ch=getchar()))&&(ch<='z')) ? (ch+'A''a') : ch );
}
[9:17pm linux01] bcc -S -o upcase2.s upcase2.c
[9:17pm linux01] cat upcase2.s
! 1 char ch;
! 2 main() {
export _main
_main:
! 3
putchar((('a'<= (ch=getchar()))&&(ch<='z')) ?
(ch+'A'-'a') : ch );
push
bp
mov
bp,sp
push
di
push
si
call
_getchar
mov
[_ch],al
cmp
al,*$61
jb
.1
.3:
mov
al,[_ch]
cmp
al,*$7A
ja
.1
.2:
mov
al,[_ch]
xor
ah,ah
add
ax,*-$20
jmp .4
.1:
mov
al,[_ch]
xor
ah,ah
.4:
push
ax
call
_putchar
inc
sp
inc
sp
! 4 }
pop
si
pop
di
pop
bp
ret
.data
.bss
.comm
_ch,1
Consider the following ANSI C program in order
to explain the functionality of each of the lines:
char n;
int i,j=0;
main() {
n = 7&i;
n = ( ~0xf)^n;
n = sizeof(i);
for ( i=1; i != 0; i = i << 1 ) j++;
}
n = 7&i;
mov
and
mov
al,[_i]
al,*7
[_n],al
n = ( ~0xf)^n;
mov
xor
mov
ax,*-$10
al,[_n]
[_n],al
n = sizeof(i);
mov
mov
al,*2
[_n],al
for ( i=1; i != 0; i = i << 1 ) j++;
mov
mov
jmp .3
.4:
mov
inc
mov
.2:
mov
shl
mov
.3:
mov
test
jne
ax,*1
[_i],ax
ax,[_j]
ax
[_j],ax
ax,[_i]
ax,*1
[_i],ax
ax,[_i]
ax,ax
.4
OPERATOR PRECEDENCE
precedence (highest first;
all associate left to right except as indicated)
=====================================================
scoping (C++ only)
::
postfix
function() array[] -> . (C++: ++ -)
unary (RtoL)
!
~
++ -- - * & sizeof +
(and C++: new delete)
casting
(type)
(C++ only)
.* ->*
arithmetic binary
*
/
%
arithmetic binary
+
shift
<< >>
relational
<
<= > >=
relational
== !=
bit ops
and
&
ex or
^
or
|
logical
&&
||
(cond? T : F) (RtoL)
?:
assignment(RtoL)
=
+= -= *= /= %= ^= |= <<=
>>=
expression sequence ,
======================================================
Notes:
"function()" refers to function invocation
"(type)" refers to type casting
unary +, - and * have higher precedence than
the corresponding binary operators
Note that because the precedence of bitwise
operators &, ^ and | is lower than that of the
equality operators (== and !=),
bit-testing expressions must be parenthesised
to give proper results:
e.g.
if ((value & BITMASK) == REQUIREDBITS) {...}
The comma between parameters of a function call is not an
operator and is different than the comma operator.
Unary + is not available in old C.
Example unary ops: *argv++ means *(argv++)
Example: *a[3] means *(a[3]) and
*a.p
means
*(a.p)
More common, however, is
a->p which abbreviates (*a).p
Grouping () override precedence but do NOT force order of
evaluation. In old C
a+(b+c) could actually be executed as (a+b)+c. To force
order of evaluation
use: a+ +(b+c) or (t=b+c, a+t). This applies to
commutative and associative
operations: + * & ^ |
In ANSI the order of
evaluation is undefined
but combination must be done as brackets imply.
The precedence and associativity are natural except for
the bit and shift
ops. Note especially that the unary operations are very
high and are all
at the same level. It could be argued that the bit ops
should have been with
arithmetics and the shift should be with just below the
unary ops - treat
these with caution; extra ()'s may be wise. Examples:
x & y == 0
means x & (y == 0) whereas (x & y) == 0
is probably intended.
x<<4 + y
means x<< (4+y) whereas (x<<4) + y might
be intended.
The latter example also shows spacing does not imply
precedence.
++ -- and assignments are the only operators that change a
stored named value;
the others just change temporary/scratch values.
SEQUENCE POINTS
Sequence points in Ansi C: These are points at which
everything before this
point must be done before the compiler/machine can go on to
do stuff after.
1) at operators: unary +
&&
||
?:
, (the
comma-operator)
2) an expression must be completely evaluated before
proceeding if:
it is an ordinary statement, that is, everything
before a ";" or "}"
is done before proceeding.
it is the control expression in an: if, for, do,
while, switch.
it is an initializer of an auto object.
3) all parameters to a function are evaluated before the
function is called
and the return expression is completely evaluated before
the return taken.
Note that assignment is NOT a sequence point, thus
constructions like
a[i++] = i;
should never be used. Also bad: f(i++) +
g(i)
DECLARATIONS
Interpret
using:
[N] =
... "
(x) =
value ...
declarations by reading from the name outward
" ... array of size N, each of whose elements is
" ... function (with parameters x) returning
"
In Old C, x is not given.
In Ansi C use "void" to indicate no
parameters.
A comma will always signal a parameter list.
*
=
" ... pointer to ... "
This has lowest
precedence!
In C++, mainly for call by reference formal param.: & =
"...reference to..."
( ) may also indicate grouping. Unlike in other
contexts, extra ( )'s are
potentially harmful: extra grouping is NOT allowed
where
this may lead to ambiguity - see example below.
In casts the name is dropped and enclosing () are used; if
in doubt read from outside inward.
typedefs are strongly recommended for complicated
situations.
EXAMPLES:
int *g[N];
int *(g[N]);
/* array of
pointers */
g is an array of size N each of whose elements is a
pointer to int
int (*g)[N];
/* pointer to
an array */
g is a pointer to: an array of size N each of whose
elements is an int
int *f(void);
/* function
returning pointer */
f is a function with no parameters returning value a
pointer to int
int (*f)();
f is a pointer to a function with unspecified parameters
which returns an int
(int ())
a cast to a function returning an integer: obtained by
stripping
f from int f(); and then enclosing result in ( )
Note that int x; and int (x); are same declarations but
the latter has
unnecessary parentheses so the cast to int is: (int)
NOT (int ()) !!!
void (*signal(int,void(*)(int)))(int); /* very confusing
*/
Use a typedef instead:
typedef void (*HANDLER)(int);
/* define intermediate
type HANDLER */
HANDLER signal(int,HANDLER);
/* equivalent to the
above */
This says that HANDLER is a pointer to a function that
takes an
int value and returns void.
signal is a function that
takes an
int and a HANDLER and returns a HANDLER.
NOTES:
In expressions read * (dereferencing) as " ... value at
address ... "
and & (referencing)
as " ... address
of ... "
[12:11am linux01] cat > line1.c
int i=5;
char line[80];
main() {
line[i] = 7;
}
[12:11am linux01] bcc -S -o line1.s line1.c
[12:12am linux01] cat line1.s
! 1
! 1 int i=5;
.data
export _i
_i:
.word
5
! 2 char line[80];
! 3 main() {
.text
export _main
_main:
! 4
line[i] = 7;
push
bp
mov
bp,sp
push
di
push
si
mov
bx,[_i]
mov
al,*7
mov
_line[bx],al
! 5 }
pop
si
pop
di
pop
bp
ret
! 6
.data
.bss
.comm
_line,$50
! 0 errors detected
[12:13am linux01] cat > data2.c
int i=5,j=12;
char data[20][30];
main() {
data[i][j] = 99;
}
[12:13am linux01] bcc -S -o data2.s data2.c
[12:13am linux01] cat data2.s
! 1 int i=5,j=12;
.data
export _i
_i:
.word
5
export _j
_j:
.word
$C
! 2 char data[20][30];
! 3 main() {
.text
export _main
_main:
! 4 data[i][j] = 99;
push
bp
mov
bp,sp
push
di
push
si
mov
bx,[_i]
mov
ax,bx
mov
cx,*$1E
imul
cx
mov
bx,ax
mov
ax,[_j]
add
bx,ax
mov
al,*$63
mov
_data[bx],al
! 5 }
pop
si
pop
di
pop
bp
ret
! 6
.data
.bss
.comm
_data,$258
! 0 errors detected
int i,j[100],k;
main() {
for (i=0; i<=99; i++) k = k+j[i];
}
xor
mov
jmp .3
.4:
mov
shl
mov
add
mov
.2:
mov
inc
mov
.3:
mov
cmp
jle
ax,ax
[_i],ax
bx,[_i]
bx,*1
ax,[_k]
ax,_j[bx]
[_k],ax
ax,[_i]
ax
[_i],ax
ax,[_i]
ax,*$63
.4
[12:07am linux01] cat simplearray.c
int i,j[100],k;
main() {
for (i=0; i<=99; i++) k = k+j[i];
}
[12:07am linux01] cat simplearray.s
! 1 int i,j[100],k;
! 2 main() {
export _main
_main:
! 3 for (i=0; i<=99; i++) k = k+j[i];
push
bp
mov
bp,sp
push
di
push
si
xor
ax,ax
mov
[_i],ax
jmp .3
.4:
mov
bx,[_i]
shl
bx,*1
mov
ax,[_k]
add
ax,_j[bx]
mov
[_k],ax
! 4 }
.2:
mov
ax,[_i]
inc
ax
mov
[_i],ax
.3:
mov
ax,[_i]
cmp
ax,*$63
jle
.4
.5:
.1:
pop
si
pop
di
pop
bp
ret
! 5
.data
.bss
.comm
_j,$C8
.comm
_i,2
.comm
_k,2
[4:34pm linux01] cat > simpleptr.c
int i, *ip, j;
main() {
ip = &i;
j = *ip + 5;
}
The two lines in the program are
equivalent to the following 8086 assembly:
mov
mov
mov
mov
add
mov
bx,#_i
[_ip],bx
bx,[_ip]
bx,[bx]
bx,*5
[_j],bx
In fact the whole program listing is as follows:
[4:38pm linux01] bcc -S -o simpleptr.s simpleptr.c
[4:39pm linux01] cat simpleptr.s
! 1 int i, *ip, j;
! 2 main() {
export _main
_main:
! 3 ip = &i;
push
bp
mov
bp,sp
push
di
push
si
mov
bx,#_i
mov
[_ip],bx
! 4 j = *ip + 5;
mov
bx,[_ip]
mov
bx,[bx]
add
bx,*5
mov
[_j],bx
! 5 }
pop
si
pop
di
pop
bp
ret
.data
.bss
.comm
_j,2
.comm
_i,2
.comm
_ip,2
What do these lines mean?
int i,j, *ip;
main() {
ip = &i;
j= j + *ip;
ip++;
((void *) ip)++;
((long int *) ip)++;
((int) ip)++;
}
char c1,c[100],*cp;
int i;
main() {
cp = c;
cp = &c[0];
c1 = c[i];
c1 = *(cp+i);
c1 = i[c];
c1 = "abcdef"[i];
}
[12:20am linux01] cat > interesting.c
char c1,c[100],*cp;
int i;
main() {
cp = c;
cp = &c[0];
c1 = c[i];
c1 = *(cp+i);
c1 = i[c];
c1 = "abcdef"[i];
}
[12:26am linux01] bcc -S -o interesting.s interesting.c
[12:26am linux01] cat interesting.s
! 1
! 1 char c1,c[100],*cp;
! 2 int i;
! 3 main() {
export _main
_main:
! 4 cp = c;
push
bp
mov
bp,sp
push
di
push
si
mov
bx,#_c
mov
[_cp],bx
! 5 cp = &c[0];
mov
bx,#_c
mov
[_cp],bx
! 6 c1 = c[i];
mov
bx,[_i]
mov
al,_c[bx]
mov
[_c1],al
! 7 c1 = *(cp+i);
mov
ax,[_i]
add
ax,[_cp]
mov
bx,ax
mov
al,[bx]
mov
[_c1],al
! 8 c1 = i[c];
mov
bx,[_i]
mov
al,_c[bx]
mov
[_c1],al
! 9 c1 = "abcdef"[i];
mov
bx,[_i]
mov
al,.1[bx]
mov
[_c1],al
! 10 }
pop
pop
pop
ret
! 11
.data
.1:
.2:
.ascii
.byte
.bss
.comm
.comm
.comm
.comm
si
di
bp
"abcdef"
0
_cp,2
_i,2
_c1,1
_c,$64
! 0 errors detected
[12:26am linux01]
[12:20am linux01] cat > interesting.c
char c1,c[100],*cp;
int i;
main() {
cp = c;
cp = &c[0];
c1 = c[i];
c1 = *(cp+i);
c1 = i[c];
c1 = "abcdef"[i];
}
cp =
mov
mov
cp =
mov
mov
c1 =
mov
add
mov
mov
mov
c1 =
mov
mov
mov
c1 =
mov
mov
mov
c1 =
mov
mov
mov
c;
bx,#_c
[_cp],bx
&c[0];
bx,#_c
[_cp],bx
*(cp+i);
ax,[_i]
ax,[_cp]
bx,ax
al,[bx]
[_c1],al
c[i];
bx,[_i]
al,_c[bx]
[_c1],al
i[c];
bx,[_i]
al,_c[bx]
[_c1],al
"abcdef"[i];
bx,[_i]
al,.1[bx]
[_c1],al
.1:
.ascii
.byte
.bss
.comm
.comm
.comm
.comm
"abcdef"
0
_cp,2
_i,2
_c1,1
_c,$64
Download