You can create ANSI C programs on the UWO Engineering Linux server. (ssh login to linux01.eng.uwo.ca) [10:00pm linux01] cat > distrib.c int p,q,r,s,t; main() { p=(q+r)*(s-t); } Programs can be cross-compiled for 8086 machines using bcc: [10:02pm linux01] bcc -S -o distrib.s distrib.c bcc produces a human-readable assembler output when the -S switch is used. You may cat the file: [10:02pm linux01] cat distrib.s ! 1 ! 1 int p,q,r,s,t; ! 2 main() { export _main _main: ! 3 p=(q+r)*(s-t); push bp mov bp,sp push di push si mov ax,[_s] sub ax,[_t] push ax mov ax,[_q] add ax,[_r] mov cx,-6[bp] imul cx inc sp inc sp mov [_p],ax ! 4 } pop si pop di pop bp ret ! 5 .data .bss .comm _t,2 .comm _q,2 .comm _s,2 .comm _p,2 .comm _r,2 Here is an explanation of the .s listing: The integer declarations cause the assembler to allocate two-byte variables in the common block: int p,q,r,s,t; .data .bss .comm _t,2 .comm _q,2 .comm _s,2 .comm _p,2 .comm _r,2 The main program assembles as a symbol, which begins by saving the BP register on the stack, and resetting BP to the current stack pointer. Then, DI and SI are pushed, using up two 2-byte words: export _main _main: main() { push bp mov bp,sp push di push si The expression is implemented by first subtracting (s-t) and pushing that result onto the stack. Subsequently, it is retrieved from this location (-6 bytes from where the base page BP points). This temporary 2-byte word is deallocated from the stack by incrementing SP twice. Finally, the result of the operations are stored in the variable (int p) which the assembler names _p. p=(q+r)*(s-t); mov ax,[_s] sub ax,[_t] push ax mov ax,[_q] add ax,[_r] mov cx,-6[bp] imul cx inc sp inc sp mov [_p],ax Finally, at the end of the C program, the stack is rebalanced as the registers SI,DI, and BP are popped. pop si pop di pop bp ret Consider the following program which will read a character and convert it to uppercase if it was a lowercase character: [9:14pm linux01] cat > upcase1.c char ch,out; main() { ch = getchar(); if (('a' <= ch) && (ch <= 'z')) out = ch + 'A' - 'a'; else out = ch; putchar(out); } This program compiles to the following essential assembler instructions: call mov mov cmp jb .3: mov cmp ja .2: mov xor add mov jmp .4 .1: mov mov .4: mov xor push call _getchar [_ch],al al,[_ch] al,*$61 .1 al,[_ch] al,*$7A .1 al,[_ch] ah,ah ax,*-$20 [_out],al al,[_ch] [_out],al al,[_out] ah,ah ax _putchar [9:14pm linux01] !bc bcc -S -o upcase1.s upcase1.c [9:14pm linux01] cat upcase1.s ! 1 ! 1 char ch,out; ! 2 main() { export _main _main: push bp mov bp,sp push di push si call _getchar mov [_ch],al mov al,[_ch] cmp al,*$61 jb .1 .3: mov al,[_ch] cmp al,*$7A ja .1 .2: mov al,[_ch] xor ah,ah add ax,*-$20 mov [_out],al jmp .4 .1: mov al,[_ch] mov [_out],al ! 8 putchar(out); .4: mov al,[_out] xor ah,ah push ax call _putchar inc sp inc sp ! 9 } pop si pop di pop bp ret .data .bss .comm _ch,1 .comm _out,1 The following ANSI C program makes use of `short hand' notations, but only leads to slightly smaller machine code: [9:17pm linux01] cat > upcase2.c char ch; main() { putchar((('a'<= (ch=getchar()))&&(ch<='z')) ? (ch+'A'-'a') : ch ); } call mov cmp jb .3: mov cmp ja .2: mov xor add jmp .4 .1: mov xor .4: push call _getchar [_ch],al al,*$61 .1 al,[_ch] al,*$7A .1 al,[_ch] ah,ah ax,*-$20 al,[_ch] ah,ah ax _putchar [9:17pm linux01] cat > upcase2.c char ch; main() { putchar((('a'<= (ch=getchar()))&&(ch<='z')) ? (ch+'A''a') : ch ); } [9:17pm linux01] bcc -S -o upcase2.s upcase2.c [9:17pm linux01] cat upcase2.s ! 1 char ch; ! 2 main() { export _main _main: ! 3 putchar((('a'<= (ch=getchar()))&&(ch<='z')) ? (ch+'A'-'a') : ch ); push bp mov bp,sp push di push si call _getchar mov [_ch],al cmp al,*$61 jb .1 .3: mov al,[_ch] cmp al,*$7A ja .1 .2: mov al,[_ch] xor ah,ah add ax,*-$20 jmp .4 .1: mov al,[_ch] xor ah,ah .4: push ax call _putchar inc sp inc sp ! 4 } pop si pop di pop bp ret .data .bss .comm _ch,1 Consider the following ANSI C program in order to explain the functionality of each of the lines: char n; int i,j=0; main() { n = 7&i; n = ( ~0xf)^n; n = sizeof(i); for ( i=1; i != 0; i = i << 1 ) j++; } n = 7&i; mov and mov al,[_i] al,*7 [_n],al n = ( ~0xf)^n; mov xor mov ax,*-$10 al,[_n] [_n],al n = sizeof(i); mov mov al,*2 [_n],al for ( i=1; i != 0; i = i << 1 ) j++; mov mov jmp .3 .4: mov inc mov .2: mov shl mov .3: mov test jne ax,*1 [_i],ax ax,[_j] ax [_j],ax ax,[_i] ax,*1 [_i],ax ax,[_i] ax,ax .4 OPERATOR PRECEDENCE precedence (highest first; all associate left to right except as indicated) ===================================================== scoping (C++ only) :: postfix function() array[] -> . (C++: ++ -) unary (RtoL) ! ~ ++ -- - * & sizeof + (and C++: new delete) casting (type) (C++ only) .* ->* arithmetic binary * / % arithmetic binary + shift << >> relational < <= > >= relational == != bit ops and & ex or ^ or | logical && || (cond? T : F) (RtoL) ?: assignment(RtoL) = += -= *= /= %= ^= |= <<= >>= expression sequence , ====================================================== Notes: "function()" refers to function invocation "(type)" refers to type casting unary +, - and * have higher precedence than the corresponding binary operators Note that because the precedence of bitwise operators &, ^ and | is lower than that of the equality operators (== and !=), bit-testing expressions must be parenthesised to give proper results: e.g. if ((value & BITMASK) == REQUIREDBITS) {...} The comma between parameters of a function call is not an operator and is different than the comma operator. Unary + is not available in old C. Example unary ops: *argv++ means *(argv++) Example: *a[3] means *(a[3]) and *a.p means *(a.p) More common, however, is a->p which abbreviates (*a).p Grouping () override precedence but do NOT force order of evaluation. In old C a+(b+c) could actually be executed as (a+b)+c. To force order of evaluation use: a+ +(b+c) or (t=b+c, a+t). This applies to commutative and associative operations: + * & ^ | In ANSI the order of evaluation is undefined but combination must be done as brackets imply. The precedence and associativity are natural except for the bit and shift ops. Note especially that the unary operations are very high and are all at the same level. It could be argued that the bit ops should have been with arithmetics and the shift should be with just below the unary ops - treat these with caution; extra ()'s may be wise. Examples: x & y == 0 means x & (y == 0) whereas (x & y) == 0 is probably intended. x<<4 + y means x<< (4+y) whereas (x<<4) + y might be intended. The latter example also shows spacing does not imply precedence. ++ -- and assignments are the only operators that change a stored named value; the others just change temporary/scratch values. SEQUENCE POINTS Sequence points in Ansi C: These are points at which everything before this point must be done before the compiler/machine can go on to do stuff after. 1) at operators: unary + && || ?: , (the comma-operator) 2) an expression must be completely evaluated before proceeding if: it is an ordinary statement, that is, everything before a ";" or "}" is done before proceeding. it is the control expression in an: if, for, do, while, switch. it is an initializer of an auto object. 3) all parameters to a function are evaluated before the function is called and the return expression is completely evaluated before the return taken. Note that assignment is NOT a sequence point, thus constructions like a[i++] = i; should never be used. Also bad: f(i++) + g(i) DECLARATIONS Interpret using: [N] = ... " (x) = value ... declarations by reading from the name outward " ... array of size N, each of whose elements is " ... function (with parameters x) returning " In Old C, x is not given. In Ansi C use "void" to indicate no parameters. A comma will always signal a parameter list. * = " ... pointer to ... " This has lowest precedence! In C++, mainly for call by reference formal param.: & = "...reference to..." ( ) may also indicate grouping. Unlike in other contexts, extra ( )'s are potentially harmful: extra grouping is NOT allowed where this may lead to ambiguity - see example below. In casts the name is dropped and enclosing () are used; if in doubt read from outside inward. typedefs are strongly recommended for complicated situations. EXAMPLES: int *g[N]; int *(g[N]); /* array of pointers */ g is an array of size N each of whose elements is a pointer to int int (*g)[N]; /* pointer to an array */ g is a pointer to: an array of size N each of whose elements is an int int *f(void); /* function returning pointer */ f is a function with no parameters returning value a pointer to int int (*f)(); f is a pointer to a function with unspecified parameters which returns an int (int ()) a cast to a function returning an integer: obtained by stripping f from int f(); and then enclosing result in ( ) Note that int x; and int (x); are same declarations but the latter has unnecessary parentheses so the cast to int is: (int) NOT (int ()) !!! void (*signal(int,void(*)(int)))(int); /* very confusing */ Use a typedef instead: typedef void (*HANDLER)(int); /* define intermediate type HANDLER */ HANDLER signal(int,HANDLER); /* equivalent to the above */ This says that HANDLER is a pointer to a function that takes an int value and returns void. signal is a function that takes an int and a HANDLER and returns a HANDLER. NOTES: In expressions read * (dereferencing) as " ... value at address ... " and & (referencing) as " ... address of ... " [12:11am linux01] cat > line1.c int i=5; char line[80]; main() { line[i] = 7; } [12:11am linux01] bcc -S -o line1.s line1.c [12:12am linux01] cat line1.s ! 1 ! 1 int i=5; .data export _i _i: .word 5 ! 2 char line[80]; ! 3 main() { .text export _main _main: ! 4 line[i] = 7; push bp mov bp,sp push di push si mov bx,[_i] mov al,*7 mov _line[bx],al ! 5 } pop si pop di pop bp ret ! 6 .data .bss .comm _line,$50 ! 0 errors detected [12:13am linux01] cat > data2.c int i=5,j=12; char data[20][30]; main() { data[i][j] = 99; } [12:13am linux01] bcc -S -o data2.s data2.c [12:13am linux01] cat data2.s ! 1 int i=5,j=12; .data export _i _i: .word 5 export _j _j: .word $C ! 2 char data[20][30]; ! 3 main() { .text export _main _main: ! 4 data[i][j] = 99; push bp mov bp,sp push di push si mov bx,[_i] mov ax,bx mov cx,*$1E imul cx mov bx,ax mov ax,[_j] add bx,ax mov al,*$63 mov _data[bx],al ! 5 } pop si pop di pop bp ret ! 6 .data .bss .comm _data,$258 ! 0 errors detected int i,j[100],k; main() { for (i=0; i<=99; i++) k = k+j[i]; } xor mov jmp .3 .4: mov shl mov add mov .2: mov inc mov .3: mov cmp jle ax,ax [_i],ax bx,[_i] bx,*1 ax,[_k] ax,_j[bx] [_k],ax ax,[_i] ax [_i],ax ax,[_i] ax,*$63 .4 [12:07am linux01] cat simplearray.c int i,j[100],k; main() { for (i=0; i<=99; i++) k = k+j[i]; } [12:07am linux01] cat simplearray.s ! 1 int i,j[100],k; ! 2 main() { export _main _main: ! 3 for (i=0; i<=99; i++) k = k+j[i]; push bp mov bp,sp push di push si xor ax,ax mov [_i],ax jmp .3 .4: mov bx,[_i] shl bx,*1 mov ax,[_k] add ax,_j[bx] mov [_k],ax ! 4 } .2: mov ax,[_i] inc ax mov [_i],ax .3: mov ax,[_i] cmp ax,*$63 jle .4 .5: .1: pop si pop di pop bp ret ! 5 .data .bss .comm _j,$C8 .comm _i,2 .comm _k,2 [4:34pm linux01] cat > simpleptr.c int i, *ip, j; main() { ip = &i; j = *ip + 5; } The two lines in the program are equivalent to the following 8086 assembly: mov mov mov mov add mov bx,#_i [_ip],bx bx,[_ip] bx,[bx] bx,*5 [_j],bx In fact the whole program listing is as follows: [4:38pm linux01] bcc -S -o simpleptr.s simpleptr.c [4:39pm linux01] cat simpleptr.s ! 1 int i, *ip, j; ! 2 main() { export _main _main: ! 3 ip = &i; push bp mov bp,sp push di push si mov bx,#_i mov [_ip],bx ! 4 j = *ip + 5; mov bx,[_ip] mov bx,[bx] add bx,*5 mov [_j],bx ! 5 } pop si pop di pop bp ret .data .bss .comm _j,2 .comm _i,2 .comm _ip,2 What do these lines mean? int i,j, *ip; main() { ip = &i; j= j + *ip; ip++; ((void *) ip)++; ((long int *) ip)++; ((int) ip)++; } char c1,c[100],*cp; int i; main() { cp = c; cp = &c[0]; c1 = c[i]; c1 = *(cp+i); c1 = i[c]; c1 = "abcdef"[i]; } [12:20am linux01] cat > interesting.c char c1,c[100],*cp; int i; main() { cp = c; cp = &c[0]; c1 = c[i]; c1 = *(cp+i); c1 = i[c]; c1 = "abcdef"[i]; } [12:26am linux01] bcc -S -o interesting.s interesting.c [12:26am linux01] cat interesting.s ! 1 ! 1 char c1,c[100],*cp; ! 2 int i; ! 3 main() { export _main _main: ! 4 cp = c; push bp mov bp,sp push di push si mov bx,#_c mov [_cp],bx ! 5 cp = &c[0]; mov bx,#_c mov [_cp],bx ! 6 c1 = c[i]; mov bx,[_i] mov al,_c[bx] mov [_c1],al ! 7 c1 = *(cp+i); mov ax,[_i] add ax,[_cp] mov bx,ax mov al,[bx] mov [_c1],al ! 8 c1 = i[c]; mov bx,[_i] mov al,_c[bx] mov [_c1],al ! 9 c1 = "abcdef"[i]; mov bx,[_i] mov al,.1[bx] mov [_c1],al ! 10 } pop pop pop ret ! 11 .data .1: .2: .ascii .byte .bss .comm .comm .comm .comm si di bp "abcdef" 0 _cp,2 _i,2 _c1,1 _c,$64 ! 0 errors detected [12:26am linux01] [12:20am linux01] cat > interesting.c char c1,c[100],*cp; int i; main() { cp = c; cp = &c[0]; c1 = c[i]; c1 = *(cp+i); c1 = i[c]; c1 = "abcdef"[i]; } cp = mov mov cp = mov mov c1 = mov add mov mov mov c1 = mov mov mov c1 = mov mov mov c1 = mov mov mov c; bx,#_c [_cp],bx &c[0]; bx,#_c [_cp],bx *(cp+i); ax,[_i] ax,[_cp] bx,ax al,[bx] [_c1],al c[i]; bx,[_i] al,_c[bx] [_c1],al i[c]; bx,[_i] al,_c[bx] [_c1],al "abcdef"[i]; bx,[_i] al,.1[bx] [_c1],al .1: .ascii .byte .bss .comm .comm .comm .comm "abcdef" 0 _cp,2 _i,2 _c1,1 _c,$64