Document 12961167

advertisement
!," $'*% $
$% $# (!'",## %$&&$%&
With a combination of software and hardware optimizations, including the
availability of PA-RISC multimedia instructions, a software video player
running on a low-end workstation is able to play MPEG compressed video
at 30 frames/s.
+ (+ $# $! " # ##' )%&$#
!6&).8.32&00= (31498*67 -&:* .1463:*) 463)9(8.:.8= '=
-*04.2, 4*340* (31498* +&78*6 &2) 136* &((96&8*0= !3)&=
(31498*67 (&2 +968-*6 .1463:* 463)9(8.:.8= '= -*04.2,
4*340* (31192.(&8* '*88*6 &2) 136* 2&896&00= !3;&6)7 8-.7
*2) &8 *;0*88?&(/&6) ;* -&:* 033/*) +36 136* 2&896&0
;&=7 83 .28*,6&8* (31192.(&8.32 43;*6 .283 396 )*7/834
1&(-.2*7 ;-.(- ;390) &003; & 97*6 83 &((*77 ).786.'98*)
.2+361&8.32 136* *&7.0= &2) (31192.(&8* ;.8- 38-*6 97*67
136* 6*&).0=
$* +*08 8-&8 &)).2, &9).3 .1&,*7 &2) :.)*3 .2+361&8.32
;390) *26.(- 8-* .2+361&8.32 1*).& 3+ 8*<8 &2) ,6&4-.(7
2361&00= &:&.0&'0* 32 )*7/834 (31498*67 79(- &7 ;36/?
78&8.327 &2) 4*6732&0 (31498*67 3;*:*6 +36 79(- *2?
6.(-*) 1908.1*).& (31192.(&8.327 83 '* 97*+90 .8 1978 '*
+900= .28*,6&8*) .283 8-* 97*67 2361&0 ;36/.2, *2:.6321*28
*2(* &7 8-* 8*(-2303,= 1&896*) ;* )*(.)*) 83 .28*,6&8*
.2(6*&7.2, 0*:*07 3+ 1908.1*).& 7944368 .283 '38- 8-* 97*6
.28*6+&(* &2) 8-* '&7.( -&6);&6* 40&8+361
2 8*617 3+ 97*6 .28*6+&(* ;* .28*,6&8*) & 4&2*0 3+ 1908.?
1*).& .(327 .283 8-* #" 78&2)&6) ,6&4-.(&0 97*6 .28*6?
+&(* ;-.(- (31*7 ;.8- &00 ;36/78&8.327 !-*7* 1908.?
1*).& .(327 &6* 4&68 3+ 8-* 3;*6 463)9(8 3;*6 *2&'0*7 & ;36/78&8.32 97*6 83 6*(*.:* &2) 7*2)
+&<*7 7-&6* 46.28*67 &((*77 &2) 1&2.490&8* .1&,*7 -*&6 &2)
7*2) :3.(* &2) ?59&0.8= 78*6*3 &9).3 7*2) &2) 6*(*.:*
1908.1*).& *1&.0 7-&6* &2 % ;.2)3; 36 &2 *0*(8632.( ;-.8*?
'3&6) ;.8- 38-*6 ).786.'98*) 97*67 &2) (&4896* &2) 40&= '&(/
:.)*3 7*59*2(*7 !-* 3;*6 73+8;&6* .7 '&7*) 32 &
(0.*287*6:*6 13)*0 .2 ;-.(- 32* 7*6:*6 (&2 7*6:.(* &6392)
(0.*287 ;-.(- (&2 '* ;36/78&8.327 36 % 8*61.2&07
2 8*617 3+ -&6);&6* 40&8+3617 ;* .28*,6&8*) 79((*77.:*
0*:*07 3+ 1908.1*).& 7944368 .283 8-* '&7*0.2* ? ;36/?
78&8.327
.678 ;* .28*,6&8*) 7944368 +36 &00 8-* 43490&6
.1&,* +361&87 79(- &7 3.28 -383,6&4-.( <4*687
6394† (3146*77*) .1&,*7 !-*2 ;* &))*) -&6);&6*
&2) 73+8;&6* 7944368 +36 &9).3 78&68.2, ;.8- ?/> :3.(*?
59&0.8= &9).3 +3003;*) '= 7944368 +36 291*6397 &9).3 +36?
1&87 .2(09).2, ?0&; ?0&; &2) ?'.8 0.2*&6 13)* ;.8- 94
83 ?/> 1323 &2) 78*6*3 !-.7 &003;*) -.,-?+.)*0.8=
?/> 78*6*3 ?'.8 ?59&0.8= &9).3 83 '* 6*(36)*)
1&2.490&8*) &2) 40&=*) '&(/ 32 ;36/78&8.327 8 8-*
7&1* 8.1* ;* 7944368*) 92(3146*77*) :.)*3 (&4896* &2)
40&='&(/
2 &29&6= .2863)9(*) 3;*6 &2) 8-*
*286=?0*:*0 *28*646.7* ;36/78&8.32 8-* 3)*0 ;-.(- .7 '&7*) 32 8-* 1908.1*).&?*2-&2(*) ? 463(*7?
736 /23;2 &7 8-* !-* :.)*3 40&=*6 .28*,6&8*)
.2 8-* 3;*6 463)9(8 .7 8-* +.678 463)9(8 8-&8 &(-.*:*7
6*&0?8.1* ? 3:.2, .(896* <4*687 6394 :.)*3
)*(3146*77.32 :.& 73+8;&6* 6922.2, 32 & ,*2*6&0?496437*
463(*7736 !=4.(&00= 6*&0?8.1* ? )*(3146*77.32 .7
&(-.*:*) :.& 74*(.&0?496437* (-.47 36 '3&6)7 6*:.397 &8?
8*1487 &8 73+8;&6* ? )*(3146*77.32 ).) 238 &88&.2
6*&0?8.1* 6&8*7 !-* +&(8 8-&8 8-.7 .7 &(-.*:*) '= 8-* 03;?
*2) 3)*0 ;36/78&8.32 .7 7.,2.+.(&28
2 8-.7 4&4*6 ;* ).7(977 8-* 7944368 3+ ?(3146*77*)
:.)*3 &7 & 2*; :.)*3 )&8& 8=4* 2 4&68.(90&6 ;* ).7(977
8-* 8*(-2303,= 8-&8 *2&'0*7 8-* :.)*3 40&=*6 .28*,6&8*) .283
8-* 3;*6 463)9(8 83 40&= '&(/ ?(3146*77*)
:.)*3 &8 6*&0?8.1* 6&8*7 3+ 94 83 +6&1*7 4*6 7*(32)
'! $ '#%&
$* )*(.)*) 83 +3(97 32 8-* ).,.8&0 :.)*3 +361&8 '*?
(&97* .8 .7 &2 28*62&8.32&0 8&2)&6)7 6,&2.>&8.32
78&2)&6) &2) .8 ,.:*7 8-* -.,-*78 :.)*3 +.)*0.8= &8 & ,.:*2
(3146*77.32 6&8.3 3+ &2= 3+ 8-* +361&87 8-&8 ;* *:&09&8*)
&073 -&7 '63&) 7944368 +631 8-* (32791*6 *0*(8632.(7
8*0*(31192.(&8.327 (&'0* &2) (31498*6 .2)9786.*7 !-*
-.,- (3146*77.32 (&4&'.0.8= 3+ 86&270&8*7 .283 03;*6
7836&,* (3787 &2) 0*77 '&2);.)8- 2**)*) +36 86&271.88.2,
:.)*3 32 8-* 2*8;36/ !-*7* (-&6&(8*6.78.(7 1&/* &2
.)*&0 +361&8 +36 &))6*77.2, 8-* 2**) +36 )*8&.0 .2 8-* :.)*3
97*) .2 8*(-2.(&0 ;36/78&8.32 1&6/*87 &2) (31498*6?'&7*)
86&.2.2, .2 (311*6(.&0 ;36/78&8.32 1&6/*87
.7 32* 3+ 7*:*6&0 &0,36.8-1.(&00= 6*0&8*) 78&2)&6)7
7-3;2 .2 ., 00 3+ 8-*7* ).,.8&0 :.)*3 (3146*77.32 78&2?
)&6)7 97* 8-* ).7(6*8* (37.2* 86&27+361 ! &7 & +92)&?
1*28&0 (31432*28 3+ 8-* &0,36.8-1 08*62&8.:*7 83 ).7(6*8*
(37.2*?'&7*) &0,36.8-17 8-&8 ;* 033/*) &8 .2(09)* :*(836
59&28.>&8.32 +6&(8&07 &2) ;&:*0*87 #*(836 59&28.>&8.32
† JPEG is an international digital image compression standard for continuous-tone (multilevel)
still images (grayscale and color).
46.0 *;0*88?&(/&6) 3962&0
 Hewlett-Packard Company 1995
MPEG-2
International
Video
Standards
MPEG-1
Nonstandard
Video Format
H.261(P*64),
H.320
Motion JPEG
JPEG
International
Image Standard
.,.9&1 ;.)*4 89&3)&7)8 '&8*) 43 9-* ).8(7*9* (48.3* 97&38A
+472
&1,47.9-28 &7* 545:1&7 43 41)*7 (425:9*7 &7(-.9*(9:7*8 '*A
(&:8* 9-*> 7*6:.7* 1*88 (425:9.3, 54<*7 94 )*(4257*88 ':9
9-.8 &);&39&,* .8 4++8*9 '> 5447*7 .2&,* 6:&1.9> &9 14< '&3)A
<.)9- -.,- (4257*88.43 (425&7*) 94 +47 57&(9.(&1
;*(947 6:&39.?&9.43 2*9-4)8 1,47.9-28 '&8*) 43 <&;*1*9
&3) +7&(9&1 9*(-3414,> -&;* 9-* 549*39.&1 94 )*1.;*7 ;.)*4
+.)*1.9> (425&7&'1* 94 ':9 9-*7* .8 57*8*391> & 1&(0 4+
.3):897> (438*38:8 43 89&3)&7).?&9.43 & 0*> 7*6:.7*2*39 +47
4:7 :8*
349-*7 &);&39&,* 4+ & -.,-A5*7+472&3(* .251*2*39&9.43 4+
.8 9-* &'.1.9> 94 1*;*7&,* 9-* .2574;*2*398 94 9-*
49-*7 "A'&8*) &1,47.9-28 19-4:,- 9-* 7*1&9.438-.58
8-4<3 .3 ., )4 349 7*57*8*39 & 97:* -.*7&7(-> 4+ &1,4A
7.9-28 .8 :8*+:1 +47 .11:897&9.3, .3(7*&8*) (4251*=.9> &8 43*
24;*8 +742 94 A 47 +742 94 A
11 4+ 9-*8* +472&98 -&;* 2:(- .3 (42243 8:(- &8 9-* :8*
4+ 9-* " +47 *3(4).3, "-* ;.8:&1 +.)*1.9> 4+ 9-* &1,47.9-28
<&8 9-* 0*> 8*1*(9.43 (7.9*7.43 &3) 349 *&8* 4+ .251*2*39&A
9.43 47 5*7+472&3(* 43 *=.89.3, -&7)<&7*
19-4:,- 8:554798 '49- 1488> &3) 14881*88 (4257*8A
8.43 9-* 9*72 .8 9>5.(&11> &884(.&9*) <.9- 9-* 1488>
85*(.+.(&9.43† "-* 57.2&7> ,4&1 4+ .8 94 &(-.*;* -.,(4257*88.43 4+ 5-494,7&5-.( .2&,*8 <.9- 1.991* 5*7(*.;*)
1488 4+ .2&,* +.)*1.9> 19-4:,- .9 .8 349 &3 ! 89&3)&7) '>
(43;*39.43 & 8*6:*3(* 4+ 1488> .2&,*8 94 (7*&9* &
).,.9&1 ;.)*4 8*6:*3(* .8 (&11*) 249.43 47 .8 & ).,.9&1 ;.)*4 89&3)&7) +742 9-* 9*1*(422:3.(&A
9.438 89&3)&7)8 '4)> "#A"!! +472*71> 034<3 &8 ""
.8 43* 4+ & 8:.9* 4+ (43+*7*3(.3, 89&3)&7)8 9-&9 2&0*
:5 9-* :2'7*11& 85*(.+.(&9.43 .8 4+9*3 7*+*77*)
94 &8 <-*7* .8 &3 .39*,*7 '*(&:8* .9 <&8 )*8.,3*) 94
+.9 .394 2:19.51*8 4+ 0'.988 '&3)<.)9- "-* +.789 +7&2*
† In lossless compression, decompressed data is identical to the original image data. In lossy
compression, decompressed data is a good approximation of the original image data.
Bandwidth (kbits/s)
10,000
MPEG-2
3,000
A (439&.38 &)).9.43&1 85*(.+.(&9.438 &3) .8 & 8:5*78*9
4+ A "-* 3*< +*&9:7*8 .3 A &7* 9&7,*9*) &9
'74&)(&89 9*1*;.8.43 7*6:.7*2*398 8:(- &8 8:55479 +47 +7&2*
.39*71*&;.3, 8.2.1&7 94 &3&14, '74&)(&89 9*(-3.6:*8 %.9<.)*857*&) )*514>2*39 4+ A 9-* ).,.9&1 7*;41:9.43 +47
;.)*4 2&> '* (425&7&'1* 94 9-* ).,.9&1 &:).4 7*;41:9.43 4+
9-* 1&89 )*(&)*
"-* &5574=.2&9* '&3)<.)9-8 7*6:.7*) 94 &(-.*;* & 1*;*1 4+
8:'/*(9.;* ;.8:&1 +.)*1.9> +47 249.43 A
&3) A &7* 8-4<3 .3 ., 49.43 <.11 57.2&7.1>
'* :8*) +47 (&8*8 .3 <-.(- &((:7&9* +7&2* *).9.3, .8 .2547A
9&39 8:(- &8 ;.)*4 *).9.3, <.11 '* :8*) 57.2&7.1> +47
;.)*4 (43+*7*3(.3, ':9 .9 &184 -&8 549*39.&1 +47 :8* .3 ;.)*4
2&.1 A &3) A <.11 '* :8*) +47 5:'1.8-.3,
<-*7* +.)*1.9> *=5*(9&9.438 -&;* '**3 8*9 '> (438:2*7 &3&A
14, ;.)*4 9&5*8 (425:9*7A'&8*) 97&.3.3, ,&2*8 24;.*8 43
&3) ;.)*4 43 )*2&3)
-&8 9<4 (1&88*8 4+ +7&2*8 .397&(4)*) &3) 343A
.397&(4)*) +7&2*8 8** ., 397&(4)*) +7&2*8 &184 (&11*)
&7* (4257*88*) '> 7*):(.3, 85&9.&1 7*):3)&3(>
<.9-.3 9-* +7&2* .98*1+ A+7&2*8 )4 349 )*5*3) 43 (425&7.A
8438 <.9- 5&89 47 @7*+*7*3(* +7&2*8 "-*> :8* A9>5*
(4257*88.43 +47 89.11 .2&,*8
Production and Broadcast
Computer-Based Training,
Analysis, and Monitoring
MPEG-1
1,000
"-* A 85*(.+.(&9.43 ,4*8 *;*3 +:79-*7 9-&3 .3
&114<.3, 845-.89.(&9*) 9*(-3.6:*8 94 &(-.*;* -.,- +.)*1.9>
<.9- +*<*7 '.98 3 &)).9.43 94 +47<&7) ).++*7*3(.3, A
&114<8 '&(0<&7) ).++*7*3(.3, <-.(- 7*1.*8 43 .3+472&9.43 .3
& +:9:7* +7&2* &3) &;*7&,.3, 4+ .2&,* +7&,2*398 47<&7)
&3) '&(0<&7) ).++*7*3(.3, &7* )*8(7.'*) .3 247* )*9&.1 .3
9-* 3*=9 8*(9.43 A &(-.*;*8 6:&1.9> (425&7&'1* 94 &
574+*88.43&11> 7*574):(*) $! ;.)*49&5* *;*3 &9 & 8.3,1*A
85**) A )&9& 7&9* '.988 A &184
85*(.+.*8 *3(4).3,8 +47 -.,-+.)*1.9> &:).4 8>3(-743.?*) <.99-* ;.)*4
Uses
Motion JPEG
30,000
.2&,* 4+ &3 8*6:*3(* .8 +47 &11 57&(9.(&1 5:7548*8 &
-.,-1> (4257*88*) 1488> .2&,* !:'8*6:*39 +7&2*8 &7*
':.19 +742 .2&,* +7&,2*398 '14(08 9-&9 &7* *.9-*7 A1.0*
47 &7* ).++*7*3(*8 +742 9-* .2&,* +7&,2*398 .3 57*;.4:8
+7&2*8 489 ;.)*4 8*6:*3(*8 -&;* -.,- +7&2*A94A+7&2* (4A
-*7*3(* "-.8 .8 *85*(.&11> 97:* +47 ;.)*4 (43+*7*3(.3, *A
(&:8* 9-* *3(4).3, 4+ 9-* 24;*2*39 4+ & 5.*(* 4+ &3 .2&,*
7*6:.7*8 1*88 )&9& 9-&3 &3 *6:.;&1*39 +7&,2*39 &(-.*;*8 -.,-*7 ;.8:&1 +.)*1.9> +47 & ,.;*3 '&3)<.)9- 9-&3
)4*8 249.43 !.3(* 9-* *3(4).3, 4+ 9-* ).++*7*3(*8 .8
&1<&>8 '&8*) 43 9-* 57*;.4:8 +7&2*8 9-* 9*(-3.6:* .8 (&11*)
H.261
300
100
Conferencing and Video Mail
30
Ideal Format
10
Poor
Adequate
Good
Very Good
Visual Fidelity
 Hewlett-Packard Company 1995
Excellent
Exceptional
Fig. 2. 4257*88*) ;.)*4 '&3)A
<.)9- ;*78:8 8:'/*(9.;* ;.8:&1
+.)*1.9> "-* .)*&1 +472&9 &(-.*;*8
*=(*59.43&1 ;.8:&1 +.)*1.9> &9 9-*
14<*89 '&3)<.)9-
57.1 *<1*99A&(0&7) 4:73&1
Forward Prediction
I
B
B
P
B
B
P
B
B
P
B
B
P
B
B
I
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
Bidirectional Prediction
I = Intracoded Frame (Like JPEG)
P = Predicted Frame
B = Bidirectionally Predicted Frame
)($(.,) !,' - , !/,.# , $0$ $(.) (
5!,' - , *, $. !,' - - )( )'*,$5
-)(- 1$.# ( ,&$ , , ! , ( !,' ( $(.,) ), *, 5
$. !,' 3 )(-$ ,$(" . '*),& , /((3 $( $5
.$)( .) -*.$& , /((3 5!,' - ( () 1$.#
! 1 , $.- 5!,' - , $$, .$)(&&3 *, $. !,' - .#.
, +/$, )( %1, , ! , ( !,' ( )( !),1, , !5
, ( !,' !), *, $.$)( , ! , ( !,' ( (
5!,' ), 5!,' /. (). 5!,' 3 . .$(" .#
').$)( )! &)%- !,)' ).# !,' .#. )/,, ,&$ , (
!,' .#. 1$&& *&3 % &. , $( .# 0$ )
- +/ ( 5!,' - ( () $( ! 1 , $.- .#( 5 ),
5!,' -
# !,' $- $0$ $(.) ',)&)%- )! 3 *$2 &!), .# */,*)- - )! ').$)( -.$'.$)(† $( )'*, --$)(
( ').$)( )'* (-.$)( $( )'*, --$)( !,'
1$.# )(&3 5&)%- $- ( 5!,' 1# , - 5!,' #- 5
&)%- ), 5&)%- ( 5!,' #- 5&)%- 5&)%- ),
5&)%- ), # 5&)% $( .# /,, (. !,' .# &)% $(
.# , ! , ( !,' .#. '.# - $. -. $- $ (.$!$ 3 ').$)( 0 .), # ( .# $!! , ( - .1 ( .# *$2 & 0&/ $( .# '.#$(" &)% $( .# , ! , ( !,' ( .# /,, (.
&)% $( .# /,, (. !,' , () 3 $-, . )-$(
.,(-!),'
# )&), -* /- $- .# , )&), , *, - (..$)(
,.# , .#( .# )&), -* 1# , , *, - (.- .#
&/'$(( ), ,$"#.( -- )'*)( (. ( ( , , *, 5
- (. .# #,)'$(( ), )&), )'*)( (.- /#/'( * , *.$)( $- '), - (-$.$0 .) &/'$(( .#( .)
#,)'$(( .# ( , )'*)( (.- ( -/-'*& $( ).# .# 2 ( 3 $' (-$)(- #$- ' (- .#. .# , $- )(
0&/ ( )( , 0&/ !), 0 ,3 !)/, 0&/ - ( 535 ',)&)% )(.$(- !)/, 535
&)%- )! (
)(&3 )( 535
&)% )! ( )( 535
&)% )! ,
0&/ - - $" #$- $- , /.$)( !,)' .# .1 &0
535
&)%- !)/, !), # )! .# .#, )&), )'*)( (.-
† Motion estimation uses temporal redundancy to estimate the movement of a block from one
frame to the next.
*,$& 1& ..5%, )/,(&
Fig. 3. !,' - +/ ($("
$! ( , 1 , (). -/-'*& # -$2 535
&)%- $(
# 535 ',)&)% .# ( /( ,") .,(-!),' )$("
,(-!),' )$(" )( (.,. - ( ,"3 $( .# &)1 , !, 5
+/ ($ - # .,(-!),' . 0&/ - , .# ( +/(.$4 3
$0$$(" 3 .# ),, -*)($(" +/(.$4.$)( ) !!$$ (. #$, -/&.- $( $-,$(" -)' )! .# #$"#5!, +/ (3 0&/ - ),
&)1 ,5!, +/ (3 /. &)15 ( ,"3 0&/ - -$( .# - )'
4 ,)- ).# .,(-!),' )$(" ( +/(.$4.$)( (& !/,.# ,
)'*, --$)( 3 ,/(5& (".# ()$(" )! 4 ,) 0&/ -
$(&&3 .# ()(4 ,) ) !!$$ (.- )! ( 535
&)% /- $(
.# $-, . )-$( .,(-!),' ( () 0$ 0,$& 5
& (".# (.,)*3 ()$(" -/# - /!!'( )$(" (.,)*3
()$(" -$&&3 , ')0 - )$(" , /((3 3 --$"($("
.# ) 1),- 1$.# .# ! 1 -. (/' , )! $.- .) .#)- )5
!!$$ (.- .#. )/, ')-. !, +/ (.&3
X X X X X X X X X X X X X X X X
O
O
O
O
O
O
O
O
X X X X X X X X X X X X X X X X
X X X X X X X X X X X X X X X X
O
O
O
O
O
O
O
O
X X X X X X X X X X X X X X X X
X X X X X X X X X X X X X X X X
O
O
O
O
O
O
O
O
X X X X X X X X X X X X X X X X
X X X X X X X X X X X X X X X X
O
O
O
O
O
O
O
O
X X X X X X X X X X X X X X X X
X X X X X X X X X X X X X X X X
O
O
O
O
O
O
O
O
X X X X X X X X X X X X X X X X
X X X X X X X X X X X X X X X X
O
O
O
O
O
O
O
O
X X X X X X X X X X X X X X X X
X X X X X X X X X X X X X X X X
O
O
O
O
O
O
O
O
X X X X X X X X X X X X X X X X
X X X X X X X X X X X X X X X X
O
O
O
O
O
O
O
O
X X X X X X X X X X X X X X X X
X = Luminance (Y)
O = Chrominance (Cb, Cr)
/-'*&$(" )! .# #,)'$(( )'*)( (.- , 1$.#
, -* . .) .# &/'$(( )'*)( (.
 Hewlett-Packard Company 1995
)*(4257*88.43 7*;*78*8 9-* +:3(9.43&1 89*58 9&0*3 +47
(4257*88.43 #-*7* &7* 8.= '&8.( 89*58 .3;41;*) .3
)*(4257*88.43
#-* -*&)*7 .8 )*(4)*) #-.8 ,.;*8 .3+472&9.43 8:(&8 5.(9:7* 7&9* '.9 7&9* &3) .2&,* 8.?*
<* +4:3) <&>8 94 7*):(* 9-* *=*(:9.43 9.2* 4+ 9-* 2489
+7*6:*39 45*7&9.43 8*6:*3(*8 #-* &551.(&9.43 4+ &1,47.9-2
*3-&3(*2*398 84+9<&7* 9:3.3, &3) 574/*(9*) -&7)<&7*
*3-&3(*2*398 <&8 .9*7&9*) :39.1 <* &99&.3*) 4:7 ,4&1 4+
'*.3, &'1* 94 )*(4257*88 &9 & 7&9* ,7*&9*7 9-&3 +7&2*88
;.& 84+9<&7*
#-* ;.)*4 )&9& 897*&2 .8 :++2&3 47 *39745> )*(4)*)
+742 ;&7.&'1*A1*3,9- (4)*8 .394 +.=*)A1*3,9- 3:2'*78 #-.8
89*5 .3(1:)*8 7:3A1*3,9- )*(4).3, 4+ ?*748
3 9*728 4+ ;.)*4 &1,47.9-28 <* .2574;*) 43 9-*
:++2&3 )*(4)*7 9-* 249.43 (425*38&9.43 &3) 9-* .3;*78*
).8(7*9* (48.3* 97&38+472 +&89*7 :++2&3 )*(4)*7 '&8*)
43 & ->'7.) 4+ 9&'1* 1440:5 &3) 97**A'&8*) )*(4).3, .8
:8*) #-* 1440:5 9&'1* 8.?*8 <*7* (-48*3 94 7*):(* (&(-*
2.88*8 47 249.43 (425*38&9.43 <* 85*) :5 9-* 5.=*1
&;*7&,.3, 45*7&9.438
3;*78* 6:&39.?&9.43 .8 5*7+472*) 43 9-* 3:2'*78 94
7*8947* 9-*2 94 9-*.7 47.,.3&1 7&3,*
3 .3;*78* ).8(7*9* (48.3* 97&38+472 .8 5*7+472*) 43 9-*
A'>A '14(08 .3 *&(- +7&2* #-.8 (43;*798 +742 9-* +7*6:*3(>
)42&.3 '&(0 94 9-* 47.,.3&1 85&9.&1 )42&.3 #-.8 ,.;*8 9-*
&(9:&1 5.=*1 ;&1:*8 +47 A'14(08 ':9 431> 9-* ).++*7*3(*8 +47
*&(- 5.=*1 +47 A'14(08 &3) A'14(08
49.43 (425*38&9.43 .8 5*7+472*) +47 A'14(08 &3) A
'14(08 #-* ).++*7*3(*8 (&1(:1&9*) .3 89*5 &7* &))*) 94 9-*
5.=*18 .3 9-* 7*+*7*3(* '14(0 &8 )*9*72.3*) '> 9-* 249.43
;*(947 +47 A'14(08 &3) 94 9-* &;*7&,* 4+ 9-* +47<&7) &3)
'&(0<&7) 7*+*7*3(* '14(08 +47 A'14(08
#-* 5.(9:7* .8 ).851&>*) '> )4.3, & (4147 (43;*78.43 +742
%'7 (447).3&9*8 94 ! (4147 (447).3&9*8 &3) <7.9.3, 94
9-* +7&2* ':++*7
:7 5-.14845-> <&8 94 .2574;* 9-* &1,47.9-28 &3) 9:3* 9-*
84+9<&7* +.789 7*8479.3, 94 -&7)<&7* 8:55479 431> .+ 3*(*8A
8&7> $* 8*9 & ,4&1 4+ 94 +7&2*88 +47 84+9<&7* ;.)*4 )*(4257*88.43 '*(&:8* 9-.8 .8 9-* 7&9* &9 <-.(- 24A
9.43 &55*&78 82449- 7&9-*7 9-&3 /*70>
$* 89&79*) '> 2*&8:7.3, 9-* 5*7+472&3(* 4+ 9-* 84+9A
<&7* <* -&) 5:7(-&8*) #-.8 84+9<&7* .3.9.&11> 9440 9<4
8*(43)8 94 )*(4)* 43* +7&2* +7&2*8 43 &3 41)*7
A? 4)*1 <47089&9.43 #-.8 )*(4).3, <&8 +47 ;.)*4
431> &3) ).) 349 .3(1:)* &:).4 74+.1.3, .3).(&9*) 9-&9 9-*
.3;*78* ).8(7*9* (48.3* 97&38+472 89*5 9440 9-* 1&7,*89
(-:30 4+ 9-* *=*(:9.43 9.2* +4114<*) '> ).851&> 89*5 +4114<*) '> 249.43 (425*38&9.43 89*5 #-* )*(4).3, 4+
9-* -*&)*78 <&8 .38.,3.+.(&39
$.9- 9-.8 )&9& <* 8*9 4:9 94 459.2.?* *;*7> 89*5 .3 9-*
)*(4257*88.43 84+9<&7* +9*7 <* &551.*) &11 9-* &1A
,47.9-2 *3-&3(*2*398 &3) 84+9<&7* 9:3.3, <* 2*&8:7*)
9-* )*(4)* 84+9<&7* &,&.3 $-.1* <* -&) &(-.*;*) &3
47)*7 4+ 2&,3.9:)* .2574;*2*39 9-* 7&9* 4+ 94 +7&2*88
<&8 349 8:++.(.*39 94 2**9 4:7 ,4&1
47 9-* .3;*78* ).8(7*9* (48.3* 97&38+472 <* :8* & +&89*7
4:7.*7 97&38+472 <-.(- 8.,3.+.(&391> 7*):(*8 9-* 3:2'*7 4+
2:19.51.*8 +47 *&(- 9<4A).2*38.43&1 A'>A .3;*78* ).8(7*9*
(48.3* 97&38+472 3 &)).9.43 <* :8* 9-* +&(9 9-&9 9-* A'>A
.3;*78* 97&38+472 2&97.(*8 &7* +7*6:*391> 85&78* 94 +:79-*7
7*):(* 9-* 2:19.51.*8 &3) 49-*7 45*7&9.438 7*6:.7*)
#-* &:).4 )*(4257*88.43 .8 &184 )43* .3 84+9<&7*
#-.8 &1,47.9-2 <&8 .2574;*) '> :8.3, & A54.39 ).8(7*9*
(48.3* 97&38+472 94 85**) :5 9-* 8:''&3) +.19*7.3,
3 9*728 4+ 84+9<&7* 9:3.3, <* @+1&99*3*) 9-* (4)* 94 7*A
):(* 9-* 3:2'*7 4+ 574(*):7* (&118 &3) 7*9:738 &3) 9-*
+7*6:*39 ':.1).3, :5 &3) 9*&7.3, )4<3 4+ (439*=98 57*8*39
.3 9-* 47.,.3&1 (4)* $* &184 ).) @897*3,9- 7*):(9.438
1.0* 7*):(.3, 2:19.51.(&9.438 94 8.251*7 45*7&9.438 8:(- &8
8-.+9 &3) &)) 47 9&'1* 1440:5
#-* 1&89 (41:23 4+ #&'1* 8-4<8 9-* 5*7(*39&,* 4+ *=*(:A
9.43 9.2* 85*39 .3 *&(- 4+ 9-* 8.= )*(4257*88.43 89*58
&+9*7 9-* &1,47.9-2 &3) 84+9<&7* 9:3.3, .2574;*2*398 <*7*
2&)* #-* +.789 9<4 (41:238 4+ #&'1* 8-4< 9-* 2.11.438 4+
.3897:(9.438 *=*(:9*) .3 *&(- 4+ 9-* 8.= )*(4257*88.43 89*58
&3) 9-* 5*7(*39 4+ 9-* 949&1 .3897:(9.438 *=*(:9*) 5&91*3,9- *&(- 89*5 7*57*8*398 #-* .35:9 ;.)*4 8*6:*3(* <&8
&3 A(4257*88*) (1.5 4+ & +449'&11 ,&2* #-* 949&1 9.2*
9&0*3 <&8 8*(43)8 43 &3 4)*1 A?
A!" <47089&9.43 <.9- '>9*8 4+ .3897:(9.43 (&(-*
&3) '>9*8 4+ )&9& (&(-*
Table I
Instructions and Time Spent in each MPEG Decompression Step
on an HP 9000 Model 735
Millions of
Instructions
*3(* <* 1440*) &9 5488.'1* 2:19.2*).& *3-&3(*2*398 94
9-* '&8.( A!" 574(*8847 &3) 49-*7 8>89*2A1*;*1 *3-&3(*A
2*398 9-&9 <4:1) 349 431> 85**) :5 )*(4).3, ':9
&184 '* ,*3*7&11> :8*+:1 +47 .2574;.3, 5*7+472&3(* .3 49-*7
(425:9&9.438 3 &)).9.43 &3> (-.5 *3-&3(*2*398 <* &))*)
(4:1) 349 &);*78*1> .25&(9 9-* )*8.,3 8(-*):1* (4251*=.9>
(>(1* 9.2* &3) (-.5 8.?* 4+ 9-* A!" 574(*8847 <* <*7*
9&7,*9.3, 9-* <-.(- <&8 &17*&)> )**5 .394 .98
.251*2*39&9.43 5-&8* &9 9-* 9.2* #-* .8 )*A
8(7.'*) .3 )*9&.1 .3 9-* &79.(1* 43 5&,*
*&)*7 )*(4)*
$* &5574&(-*) 9-.8 574'1*2 '> 89:)>.3, 9-* ).897.':9.43 4+
45*7&9.438 *=*(:9*) '> 9-* 84+9<&7* )*(4)*7 #-*3
 Hewlett-Packard Company 1995
Path Length
(%)
Time (%)
.851&>
#49&1
:++2&3 )*(4)*
3;*78* 6:&39.?&9.43
3;*78* ).8(7*9*
(48.3* 97&38+472
49.43
(425*38&9.43
57.1 *<1*99A &(0&7) 4:73&1
'$ * 0&$12 1*("$ -% $6$"32(-, 2(+$ ,# 2'$ * 0&$12
"'3,) -% (,1203"2(-,1 $6$"32$# 5$0$ 12(** 2'$ (,4$01$
#(1"0$2$ "-1(,$ 20 ,1%-0+ $ 123#($# 2'$ %0$/3$,"($1 -% &$9
,$0(" -.$0 2(-,1 (, 2'(1 &0-3. ,# 22$+.2$# 2- $6$"32$
2'$+ % 12$0 '(1 0$13*2$# (, ,$5 9 .0-"$11-0 (,1203"9
2(-,1 %-0 ""$*$0 2(,& +3*2(+$#( 1-%25 0$
'$ ,$5 .0-"$11-0 +3*2(+$#( (,1203"2(-,1 (+.*$+$,2$# (,
2'$ .0-"$11-0 **-5 1(+.*$ 0(2'+$2(" -.$0 2(-,1
2- !$ $6$"32$# (, . 0 **$* -, 13!5-0# # 2 (, 2'$ 12 ,# 0#
(,2$&$0 # 2 . 2' , . 02("3* 0 2'$ (,2$&$0 (1 . 02(2(-,$#
1- 2' 2 (2 " , $6$"32$ . (0 -% 0(2'+$2(" -.$0 2(-,1 (,
1(,&*$ "7"*$ 5(2' 1(,&*$ (,1203"2(-, '$ 0(2'+$2(" -.$0 9
2(-,1 ""$*$0 2$# (, 2'(1 5 7 0$ ## 13!20 "2 4$0 &$ 1'(%2
*$%2 ,# ## ,# 1'(%2 0(&'2 ,# ## '$ * 22$0 25- -.$0 9
2(-,1 0$ $%%$"2(4$ (, (+.*$+$,2(,& +3*2(.*(" 2(-, !7 "-,9
12 ,21
PA-RISC Multimedia Extensions 1.0. '$ 9
.0-"$11-0 "'(. "-,2 (,1 1-+$ (,1203"2(-,1 2' 2 -.$0 2$ (,#$9
.$,#$,2*7 ,# (, . 0 **$* -, 25- 9!(2 # 2 %($*#1 5(2'(,
9!(2 0$&(12$0 '$1$ -.$0 2(-,1 0$ (,#$.$,#$,2 (, 2' 2 !(21
" 00($# -0 1'(%2$# -32 -% -,$ -% 2'$ %($*#1 ,$4$0 %%$"21 2'$
0$13*2 (, 2'$ -2'$0 %($*# '$1$ -.$0 2(-,1 -""30 (, . 0 **$* (,
2' 2 1(,&*$ (,1203"2(-, "-+.32$1 !-2' 9!(2 %($*#1 -% 2'$
0$13*2 !*$ 13++ 0(8$1 2'$1$ (,1203"2(-,1
Table II
PA-RISC Multimedia Instructions in PA 7100LC
ra contains a1; a2
rb contains b1; b2
rt contains t1; t2
Instruction
Parallel Operation
HADD ra,rb,rt
t1 = (a1+b1) mod216;
t2 = (a2+b2) mod216;
HADD.ss ra,rb,rt
t1 = IF (a1+b1) (215–1) THEN (215–1)
ELSEIF (a1+b1) –215 THEN (–215)
ELSE (a1+b1);
t2 = IF (a2+b2) (215–1) THEN (215–1)
ELSEIF (a2+b2) –215 THEN (–215)
ELSE (a2+b2);
HADD.us ra,rb,rt
t1 = IF (a1+b1) (216–1) THEN (216–1)
ELSEIF (a1+b1) 0 THEN 0
ELSE (a1+b1);
t2 = IF (a2+b2) (216–1) THEN (216–1)
ELSEIF (a2+b2) 0 THEN 0
ELSE (a2+b2);
HSUB ra,rb,rt
t1 = (a1–b1) mod216;
t2 = (a2–b2) mod216;
HSUB.ss ra,rb,rt
t1 = IF (a1–b1) (215–1) THEN (215–1)
ELSEIF (a1–b1) –215 THEN (–215)
ELSE (a1–b1);
t2 = IF (a2–b2) (215–1) THEN (215–1)
ELSEIF (a2–b2) –215 THEN (–215)
ELSE (a2–b2);
HSUB.us ra,rb,rt
t1 = IF (a1–b1) (216–1) THEN (216–1)
ELSEIF (a1–b1) 0 THEN 0
ELSE (a1–b1);
t2 = IF (a2–b2) (216–1) THEN (216–1)
ELSEIF (a2–b2) 0 THEN 0
ELSE (a2–b2);
HAVE ra,rb,rt
t1 = (a1+b1)/2;
t2 = (a2+b2)/2;
HSLkADD ra,k,rb,rt
t1 = (a1k) + b1;
t2 = (a2k) + b2;
(for k = 1, 2, or 3)
HSRkADD ra,k,rb,rt
t1 = (a1k) + b1;
t2 = (a2k) + b2;
(for k = 1, 2, or 3)
HADD #-$1 25- . 0 **$* 9!(2 ##(2(-,1 -, 2'$ *$%2 ,# 2'$
0(&'2 ' *4$1 -% 0$&(12$01 ra ,# rb .* "(,& 2'$ 25- 9!(2 0$9
13*21 (,2- 2'$ *$%2 ,# 0(&'2 ' *4$1 -% 0$&(12$0 rt
HSUB #-$1 25- . 0 **$* 9!(2 13!20 "2(-,1 -, 2'$ *$%2 ,#
0(&'2 ' *4$1 -% 0$&(12$01 ra ,# rb .* "(,& 2'$ 25- 9!(2 0$9
13*21 (,2- 2'$ *$%2 ,# 0(&'2 ' *% -% 0$&(12$0 rt
-2' HADD ,# HSUB .$0%-0+ +-#3*- 0(2'+$2(" +-#3*31
2' 2 (1 2'$ 0$13*2 50 .1 0-3,# %0-+ 2'$ * 0&$12 ,3+!$0
! ") 2- 2'$ 1+ **$12 ,3+!$0 ,# 4("$ 4$01 '(1 (1 2'$ 313 *
+-#$ -% -.$0 2(-, -% 25-1 "-+.*$+$,2 ##$01 5'$, -4$09
%*-5 (1 (&,-0$#
HADD ,# HSUB *1- ' 4$ 25- 1 230 2(-, 0(2'+$2(" -.2(-,1
(2' 2'$ 1(&,$# 1 230 2(-, -.2(-, HADD.ss !-2' -.$0 ,#1
,# 2'$ 0$13*2 0$ "-,1(#$0$# 1(&,$# 9!(2 (,2$&$01 % 2'$
0$13*2 " ,,-2 !$ 0$.0$1$,2$# 1 1(&,$# 9!(2 (,2$&$0 (2 (1
"*(..$# 2- 2'$ * 0&$12 .-1(2(4$ 4 *3$ (% .-1(2(4$ -4$09
%*-5 -""301 -0 (2 (1 "*(..$# 2- 2'$ 1+ **$12 ,$& 2(4$ 4 *3$
(% ,$& 2(4$ -4$0%*-5 -""301
(2' 2'$ 3,1(&,$# 1 230 2(-, -.2(-, HADD.us 2'$ %(012 -.$0 ,#
ra (1 "-,1(#$0$# , 3,1(&,$# 9!(2 (,2$&$0 2'$ 1$"-,#
-.$0 ,# rb (1 "-,1(#$0$# 1(&,$# 9!(2 (,2$&$0 ,# 2'$
0$13*2 (, rt (1 "-,1(#$0$# , 3,1(&,$# 9!(2 (,2$&$0 % 2'$
0$13*2 " ,,-2 !$ 0$.0$1$,2$# 1 , 3,1(&,$# 9!(2 (,2$&$0 (2
(1 "*(..$# 2- 2'$ * 0&$12 3,1(&,$# 4 *3$ (% .-1(2(4$
-4$0%*-5 -""301 -0 (2 (1 "*(..$# 2- 2'$ 1+ **$12 3,1(&,$#
4 *3$ (% ,$& 2(4$ -4$0%*-5 -""301
'$ 1(&,$# 1 230 2(-, ,# 3,1(&,$# 1 230 2(-, -.2(-,1 %-0
. 0 **$* ' *%5-0# 13!20 "2(-, 0$ #$%(,$# 1(+(* 0*7
HAVE -0 ' *%5-0# 4$0 &$ &(4$1 2'$ 4$0 &$ -% $ "' . (0 -%
' *%5-0#1 (, ra ,# rb 2 2 )$1 2'$ 13+ -% . 0 **$* ' *%5-0#1
,# #-$1 0(&'2 1'(%2 -% -,$ !(2 !$%-0$ 12-0(,& $ "' 9!(2
0$13*2 (,2- rt 30(,& 2'$ -,$9!(2 0(&'2 1'(%2 2'$ " 007 (1
.0(* $5*$229 ") 0# -30, *
ss = signed saturation option
us = unsigned saturation
1'(%2$# (, -, 2'$ *$%2 ,# 3,!( 1$# 0-3,#(,& (1 .$0%-0+$# -,
2'$ *$ 1291(&,(%(" ,2 !(2 -, 2'$ 0(&'2 $" 31$ 2'$ " 007 (1
1'(%2$# (, ,- -4$0%*-5 " , -""30 (, 2'$ HAVE (,1203"2(-,
HSLkADD -0 ' *%5-0# 1'(%2 *$%2 ,# ## **-51 -,$ -.$0 ,#
2- !$ 1'(%2$# *$%2 !7 ) !(21 5'$0$ ) (1 -0 !$%-0$
!$(,& ##$# 2- 2'$ -2'$0 -.$0 ,#
HSRkADD -0 ' *%5-0# 1'(%2 0(&'2 ,# ## **-51 -,$ -.$09
,# 2- !$ 1'(%2$# 0(&'2 !7 ) !(21 5'$0$ ) (1 -0 !$9
%-0$ !$(,& ##$# 2- 2'$ -2'$0 -.$0 ,#
-2' HSLKADD ,# HSRKADD 31$ 1(&,$# 1 230 2(-,
* Unbiased rounding means that the net difference between the true averages and the averages
obtained after unbiased rounding is zero if the results are equally distributed in the result range.
 Hewlett-Packard Company 1995
Saturation Arithmetic. In saturation arithmetic a result is said to
have a positive overflow if it is larger than the largest value
in the defined range of the result. It is said to have a negaĆ
tive overflow if it is smaller than the smallest value in the
defined range of the result. If the saturation option is used
for the HADD and HSUB instructions, the result is clipped to
the maximum value in its defined range if positive overflow
occurs and to the minimum value in its defined range if negĆ
ative overflow occurs. This further speeds up the processing
because it replaces using about ten instructions to check for
positive and negative overflows and performs the desired
clipping of the result for a pair of operations in one instrucĆ
tion.
Other PA 7100LC processor enhancements streamline the
memoryĆtoĆI/O path. By having the memory controller and
the I/O interface controller integrated in the PA 7100LC chip,
overhead in the memoryĆtoĆframeĆbuffer bandwidth is reĆ
duced. Overhead in the processorĆtoĆgraphicsĆcontrollerĆchip
path is also reduced for both control and data.
Saturation arithmetic is highly desirable in dealing with pixel
values, which often represent hues or color intensities. It is
undesirable to perform the normal modulo arithmetic in
which overflows wrap around from the largest value to the
smallest value and vice versa. For example, in 8Ćbit pixels, if
0 represents black and 255 represents white, a result of 256
should not change a white pixel into a black one, as would
occur with modulo arithmetic. In saturation arithmetic, a
result of 256 would be clipped to 255.
Table III
Instructions and Time Spent in each MPEG Decompression Step
on a Model 712 Workstation
Since the PA 7100LC processor has two integer ALUs, we
essentially have a parallelism of four halfword operations
per cycle. This gives a speedup of four times, in places
where the superscalar ALUs can be used in parallel. Because
of the builtĆin saturation arithmetic option, speedup of cerĆ
tain pieces of code is even greater.
The second longest functional step (see Table I) in MPEG
decompression was the display step. Here, we leveraged the
graphics subsystem to implement the color conversion step
together with the color recovery already being done in the
graphics chip.7 Color conversion converts between color
representations in the YCbCr color space and the RGB color
space. Color recovery reproduces 24Ćbit RGB color that has
been color compressed into 8 bits before being displayed.
Color compression allows the use of 8Ćbit frame buffers in
lowĆcost workstations to achieve almost the color dynamics
of 24Ćbit frame buffers. This leveraging of lowĆlevel pixel
manipulations close to the frame buffer between the graphĆ
ics and video streams also contributed significantly to the
attainment of realĆtime MPEG decompression. Color recovery
and the graphics chip are described in the articles on pages
51 and 43, respectively.
 Hewlett-Packard Company 1995
Millions of
Instructions
Path Length
(%)
Time (%)
Header decode
0.60
0.2
0.3
Huffman decode
55.0
16.1
14.5
8.9
2.6
4.5
138.5
40.6
34.4
Motion
compensation
74.8
21.9
25.6
Display
63.0
18.5
20.7
340.8
100.0
100.0
Inverse quantization
Inverse discrete
cosine transform
Total
The Model 712 executes consistently fewer instructions than
the Model 735 for the same MPEG decompression of the
same video clip. It is also faster in MPEG decompression
even though it operates at only 60% of the 99ĆMHz rate of
the highĆend Model 735 and has only one eighth of the
cache size. This shows the performance benefits from the
path length reduction enabled by the PAĆRISC processor and
system enhancements for multimedia acceleration.
The performance of the PAĆRISC architectural enhancements
and the leveraging of the graphics subsystem for video deĆ
compression can be seen in Fig. 5. This data is for a
33.1
35
30
26
25
Frames/s
Effect on MPEG Decoding. These parallel subword arithĆ
metic operations significantly speed up several critical parts
of the MPEG decoder program, especially in the inverse disĆ
crete cosine transform and motion compensation steps. More
than half of the instructions executed for the inverse transĆ
form step are these parallel subword arithmetic instructions.
Their implementation does not impact the processor's cycle
time, and adds less than 0.2% of silicon area to the PA
7100LC processor chip. Actually, the area used was mostly
empty space around the ALU, so that these multimedia enĆ
hancements can be said to have contributed to more effiĆ
cient area utilization, rather than adding incremental chip
area. See Overview of the Implementation of the MultiĆ
media Enhancements" on page 66.
Table III shows the same information as Table I but for the
lowĆend Model 712 workstation which uses the multimediaĆ
enhanced PA 7100LC processor and the graphics chip
mentioned above.
18.72
20
15
10
9.91
5
0
Model 715
50 MHz
Model 735
99 MHz
Model 712
60 MHz
Model 712
80 MHz
Fig. 5. Maximum MPEG decode frame rates for different models of
HP 9000 Series 700 workstations. These rates are for a 352ĆbyĆ240Ć
pixel clip that was encoded at 30 frames/s.
April 1995 HewlettĆPackard Journal
One goal in adding the multimedia instructions was to minimize the amount of new
circuits to be added to the existing ALUs and to minimize the impact on the rest of
the CPU. This goal was accomplished. The only circuit changes to the CPU were
in the ALU data path and decoder circuits. These instructions reuse most of the
existing functionality and very small modifications and additions were required to
implement them.
All of the new instructions implemented require two 16-bit adds or subtracts to be
done in parallel. The existing ALU adder was modified to provide this functionality.
These instructions required that the existing 32-bit adder be conditionally split into
two 16-bit halves without sacrificing the performance of the 32-bit add. Conceptually this is equivalent to blocking the carry from bit 16 to bit 15 in a ripple-carry
adder. To accomplish this, we made the following modifications.
The ALU adder is similar to a carry lookahead adder. The first stage of the adder
calculates a carry generate and a carry propagate signal for each single bit in the
adder. In this case, 32 single-bit generate and 32 single-bit propagate signals are
calculated. These single-bit carry generate and carry propagate signals are used
in subsequent stages of the carry chain to calculate carry generate and carry
propagate signals for groups of bits.
The 32-bit adder was divided into two 16-bit halves between bits 15 and 16 by
providing alternate signals for the carry generate and carry propagate signals
from bit 16 (Fig. 1). The new generate and propagate signals from bit 16 are
created with a two-input multiplexer. When a 32-bit addition or subtraction is being
performed, the multiplexer selects the original generate and propagate signals
to be passed onto the next stage of the carry chain. When 16-bit addition or
subtraction is being performed the multiplexer selects the value for generate and
propagate from the second input which is false (logical 0) for additions and true
(logical 1) for subtractions.
Prop[31]
Gen[31]
Gen[30]
Prop[30]
Gen[29]
Prop[29]
Prop[16]
Gen[16]
Gen[15]
Prop[15]
Gen[1]
Prop[1]
Gen[0]
Prop[0]
Carry
Lookahead
Logic
B[31]
A[31]
B[30]
A[30]
B[29]
A[29]
B[16]
A[16]
B[15]
A[15]
B[1]
A[1]
B[0]
LSB
A[0]
MSB
Carry Chain Inputs
A[16]
A[16]
Old Gen[16]
B[16]
A[16]
B[16]
Old Prop[16]
Old Gen[16]
B[16]
16-Bit Carry-in
(0 for Add,
1 for Subtract)
A[16]
B[16]
16-Bit Carry-in
(0 for Add,
1 for Subtract)
Before Halfword Modification
to Block-Carry Propagation
MUX
Gen[16]
Old Prop[16]
MUX
Prop[16]
After Halfword Modification
Fig. 1. Modifications to the carry lookahead adder to accommodate the halfword instructions.
Normal Data
16
ALU and
Preshifter
Results
Signed or Unsigned
Positive and Negative
Overflow Logic
16
Saturated Data
16
Force Low
Force High
16
Normal Data
16
16
Saturated Data
16
Force Low
Fig. 2. Saturation logic. There is one of the these circuits for each halfword.
propagating from bit 16 to bit 15, even if this generate and propagate signal is not
used directly to calculate the carry signal (as is the case in this adder). The generate and propagate signals can also be forced to be true for instructions requiring
halfword subtraction. This will force a carry into the more significant halfword of the
adder by generating a carry from bit 16 into bit 15. This technique is used along
with the ones complement of the operand to be subtracted to perform subtraction
as twos complement addition.
The original carry generate and propagate signals from bit 16 are still generated to
calculate overflows from the less significant halfword addition. This overflow is
used by the saturation logic, which can be invoked by some of these instructions.
Saturation requires groups of bits of the result to be forced to states of true or
false, or passed unchanged. This is accomplished with an AND-OR gate (Fig. 2).
The AND function can force the output of the gate to be false and the OR function
can force the output of the gate to be true. Thus, the output is either forced high,
forced low, or forced neither high nor low. It is never simultaneously forced high
and low. The key is to determine when to force the result to a saturated value.
The saturation circuit is added at the end of the ALU’s data path after the result
selection multiplexer selects one of the results from the adder after it performs
additions, subtractions, or logical operations such as bitwise AND, OR, or XOR
(Fig. 3). The saturation circuit does not impact the critical speed paths of the ALU
because it is downstream from the point where the cache data address is driven
from the adder and where the test condition logic (i.e., logic for conditional branch
instructions) obtains the results from which to calculate a test condition.
C[29]
C[30]
C[31]
C[0]
C[1]
C[2]
C[15]
Carry Chain Outputs
C[16]
The new generate and propagate signals can be forced to be false for instructions
requiring halfword addition. This stops the carry from being generated by bit 16 or
Force High
16
If signed saturation is selected, the ALU will force any 16-bit result that is larger
than 0x7fff to 0x7fff (215–1) and any 16-bit result that is smaller than 0x8000 to
0x8000 (–215). These conditions represent positive and negative overflow of
signed numbers. Positive and negative overflow can be detected by examining the
sign bit (the MSB) of each operand and the result of the add. If both operands are
positive and the result is negative then a positive overflow has occurred and the
result in this case is saturated by forcing the most-significant bit to a logical 0 and
the rest of the bits to a logical 1. If both operands are negative and the result is
positive then a negative overflow has occurred and the result in this case is saturated by forcing the most significant bit to a logical 1 and the rest of the bits to a
logical 0. Unsigned saturation is implemented in a similar way.
The average instruction, HAVE, requires manipulating the result after the addition
is finished. Before the implementation of the halfword instructions the ALU selected between the results of a bitwise AND, a bitwise OR, a bitwise XOR, or the
sum of the two input operands. The halfword average instruction adds an additional choice. The average result is the sum of the two input operands shifted right
one bit position with a carry out of the most-significant bit (MSB) becoming the
MSB of the result. To perform rounding of the result, the least-significant bit (LSB)
of the result is replaced by an OR of the two least-significant bits before shifting
right one bit.
The shift right and add and the shift left and add functions were added by modifying
the x-bus preshifter in the operand selection logic of the ALU. The original ALU was
capable of shifting 32-bit inputs left by zero, one, two, or three bits. To implement
the 16-bit shift left and add instructions, the left-shift circuits had to be broken at
 Hewlett-Packard Company 1995
General
Register
General
Berkeley Software without Hardware and Software Enhancements
Register
30
25
Preshifter
Overflow
Frames/s
Operand 1 Selection and
One’s Complement
Operand 1 Selection
and Preshifter
32-Bit Partitioned
Adder
16-Bit Adder
+
16-Bit Adder
+
20
HP Decoder
with Software
Enhancements, but
without Hardware Enhancements
15
10
9.7
10.9
Model 720 50 MHz
Adder
Overflow
Saturation Logic
General
Register
Fig. 3. Flow of halfword instructions showing the location of the saturation logic in
relation to the ALU.
the halfword boundary. This was done by ANDing the bits shifted from the leastsignificant halfword to the most-significant halfword with a control signal that indicates when a 32-bit shift is being done. The 16-bit shift right and add instructions
were implemented by adding the ability to shift one, two, or three bits right. This
shift is always broken at the halfword boundary.
One challenging aspect of implementing the 16-bit shift left and add instructions
was detecting when the results of shifting an operand left by one, two, or three bits
causes a positive or negative overflow. A positive overflow occurs when the unshifted operand is positive and a logical one is shifted out of the left, or when the
result of the shift is negative. A negative overflow occurs when the unshifted operand is negative and a logical zero bit is shifted out of the left, or when the result of
the shift is positive. These overflow conditions are combined with the overflows
calculated by the adder and used to saturate the final result. The final result is
saturated if either the left shift or the adder causes an overflow.
The result of selecting instructions that can provide the most useful functionality
while costing the least to implement was a relatively small increase in the area of
the ALU. About 15% of the ALU’s area is devoted to halfword instructions. Since
the ALU’s circuits were the only ones modified on the processor chip, only about
0.2% of the total processor’s chip area is devoted to halfword instructions.
5)$%. #+)/ 3(!3 6!2 #.,/1%22%$ !3 &1!,%22 (% .$%+
!-$ .$%+ !1% "!2%$ .- 3(% /1.#%22.1 (%
.$%+ )2 "!2%$ .- 3(% /1.#%22.1 6()#( )2 !
$%1)5!3)5% .& 3(% (% #.-3!)-2 3(% ,4+3):
,%$)! %-(!-#%,%-32 !-$ 2823%, )-3%'1!3).- &%!341%2 !-$ )2
$%2#1)"%$ )- 3(% !13)#+% .- /!'% (% .+$%1 ()'(:%-$
.$%+ 14--)-' !3 9 !#()%5%2 &1!,%22 6()+%
3(% -%6%1 %-318:+%5%+ .$%+ !#()%5%2 &1!,%22 !3 9 !-$ &1!,%22 !3 9 (%2% &1!,% $%#.,/1%2:
2).- 1!3%2 !1% 04.3%$ &.1 5)$%. .-+8 -. !4$). 6)3(
 Hewlett-Packard Company 1995
11.1
5
Berkeley
Result Selection MUX
26.2
HP Decoder with
Software and
Hardware
Enhancements
0
Cache
Address
Adder
Overflow
Berkeley Software
Model 712 60 MHz
HP
.,/!1)2.- "%36%%- 3(% /%1&.1,!-#% .& 3(% %-(!-#%$
%1*%+%8 $%#.$%1 !-$ 3(% $%#.$%1 6)3(.43 !4$).
-. #.-231!)-32 .- (.6 &!23 3(% $%#.$)-' #!- /1.#%%$ .3(%1 6.1$2 3(% $%#.$)-' 1!3% )2 -.3 #.-231!)-%$ "8 3(% 1!3%
!3 6()#( 3(% 231%!, (!2 "%%- #.,/1%22%$ %-#%
!+3(.4'( 3(% 5)$%. #+)/ 42%$ 6!2 #.,/1%22%$ !3 &1!,%22 3(% :9 .$%+ #!- $%#.$% )3 &!23%1 3(!- &1!,%22 )- 4-#.-231!)-%$ ,.$% ()2 ),/+)%2 3(!3 3(%1% )2
2.,% /1.#%22.1 "!-$6)$3( +%&3 !&3%1 !#()%5)-' 1%!+:3),% 2.&3:
6!1% 5)$%. $%#.$)-'
- 3(% 5)$%. /+!8%1 /1.$4#3 )- .6%1 &1!,%2 !1%
2*)//%$ )& 3(% $%#.$%1 #!--.3 *%%/ 4/ 6)3( 3(% $%2)1%$ 1%!+:
3),% 1!3% ()2 1%24+32 )- ! +.6%1 %&&%#3)5% &1!,% 1!3% 2)-#%
2*)//%$ &1!,%2 !1% -.3 #.4-3%$ %5%- 3(.4'( %7%#43).- 3),%
,!8 (!5% "%%- 42%$ &.1 /!13)!+ $%#.$)-' .& ! 2*)//%$ &1!,%
)' 2(.62 ! #.,/!1)2.- "%36%%- 3(% %-(!-#%$ %1*%+%8
2.&36!1% $%#.$%1 !-$ 3(% 2.&36!1% $%#.$%1
14--)-' .- 3(% .+$%1 .$%+ 6)3( -. (!1$6!1%
,4+3),%$)! %-(!-#%,%-32 !-$ 3(% -%6%1 .$%+ 6.1*:
23!3).- 6)3( (!1$6!1% ,4+3),%$)! %-(!-#%,%-32 (%
&.413( #.+4,- )- )' )++4231!3%2 3(% /%1&.1,!-#% ."3!)-:
!"+% 6)3( 28-%1')23)# 2.&36!1% !-$ (!1$6!1% %-(!-#%,%-32
- 3(% .$%+ 3(% %1*%+%8 !-$ 2.&36!1% $%#.$%12
(!5% #.,/!1!"+% /%1&.1,!-#% .1 3(% .$%+ 3(% /%1:
&.1,!-#% .& 3(% $%#.$%1 6!2 3),%2 '1%!3%1 3(!- 3(%
%1*%+%8 $%#.$%1 "%#!42% .& 3(% 28-%1')23)# #.4/+)-' .& 3(%
!+'.1)3(,2 !-$ 2.&36!1% ./3),)9%$ 6)3( 3(% : ,4+3):
,%$)! )-2314#3).-2 !-$ 3(% 2823%,:+%5%+ %-(!-#%,%-32 )- 3(%
.$%+ )' 2(.62 3(% /%1&.1,!-#% 6(%- !4$). .& 5!1).42
&)$%+)38 +%5%+2 )2 !+2. $%#.,/1%22%$ "8 2.&36!1% 14--)-' .3(% '%-%1!+:/41/.2% /1.#%22.1 (% ()'(%23:&)$%+:
)38 !4$). )2 23%1%. 6)3( -. $%#),!3).- ()2 ,%!-2 3(!3 %5%18
!4$). 2!,/+% #.,%2 !2 ! /!)1 .& +%&3 !-$ 1)'(3 #(!--%+ 5!+:
4%2 !-$ %5%18 2!,/+% )2 42%$ !+& $%#),!3).- ,%!-2 3(!3
.-% .43 .& %5%18 36. !4$). 2!,/+%2 )2 42%$ $%#),!3).,%!-2 3(!3 .-+8 .-% .43 .& %5%18 &.41 !4$). 2!,/+%2 )2
42%$ .-. ,%!-2 3(!3 %5%18 !4$). 2!,/+% )2 ! 2)-'+% 5!+4%
#(!--%+ 1!3(%1 3(!- ! /!)1 .& 5!+4%2
()+% 2.&36!1% $%#.,/1%22).- .& !4$). $%'1!$%2 3(%
/%1&.1,!-#% )- 3%1,2 .& &1!,%2 $%#.$%$ /%1 2%#.-$ 3(% :"!2%$ 6.1*23!3).-2 !#()%5%$ 1!3%2 .& &1!,%22 !3
9 &1!,%22 !3 9 !-$ &1!,%22 !3
/1)+ %6+%33:!#*!1$ .41-!+
35
Frames/s
25
10
5
0
ÎÎ
ÎÎ
ÎÎ
ÎÎ
ÎÎ
22.5
20
15
ÎÎ
ÎÎ
ÎÎ
ÎÎ
ÎÎ
ÎÎ
ÎÎ
29.8
30
18.2
15.1
Model 715
60 MHz
Stereo
27.3
24.2
Model 712
80 MHz
Mono with 1/2 Decimation
ÎÎ
ÎÎ
ÎÎ
ÎÎ
ÎÎ
ÎÎ
ÎÎ
ÎÎ
29.3 30.0
27.4
Model 715
100 MHz
No Audio
Fig. 7. Performance when MPEG video and MPEG audio are deĆ
coded in software.
100 MHz even with the highestĆfidelity 44.1ĆkHz stereo
16Ćbit linear audio format with no decimation. With further
enhancements of audio decoding and audioĆvideo synchroĆ
nization, we should be able to do even better.
We wanted a software approach to MPEG decoding because
we felt that if video is to be useful it has to be pervasive,
and to be pervasive, it should exist at the lowest incremental
cost on all platforms. With a software video decoder, there is
essentially no additional cost. In addition, the evolving stanĆ
dards and improving algorithms pointed to a flexible soluĆ
tion, like software running on a generalĆpurpose processor.
Using specialĆpurpose chips designed for MPEG decoding,
or even for JPEG, MPEG, and H.261 compression and deĆ
compression, would not allow one to take advantage of imĆ
proved algorithms and adapt to evolving standards without
buying and installing new hardware.
Furthermore, since the performance of generalĆpurpose
microprocessors continues to improve with each new genĆ
eration, we wanted to be able to leverage these improveĆ
ments for multimedia computations such as video decomĆ
pression. This approach also allows us to focus hardware
design efforts on improving the performance of the generalĆ
purpose processor and system without having to replicate
performance efforts in each specialĆpurpose subsystem, such
as the graphics and video subsystems. The PAĆRISC multimeĆ
dia instructions are also useful for graphics, image, and
audio computations, or any computations requiring arithmeĆ
tic on a lot of numbers with precision less than 16 bits.
The net result is that we achieve realĆtime MPEG decoding
of video streams at 30 frames/s with a software decoder.
This was achieved by a synergistic combination of algorithm
enhancements, software tuning, PAĆRISC processor multimeĆ
dia enhancements, combining video and graphics support
for color conversions and color compression, and system
tuning. The PAĆRISC multimedia enhancements allow parallel
processing of pixels in the standard integer data path at an
insignificant addition to the silicon area. The total area used
is less than 0.2% of the PA 7100LC processor chip with no
impact on the cycle time or the control complexity.
The realĆtime software MPEG decoding rate of the final
video player product exceeds our original goal of 10 to 15
April 1995 HewlettĆPackard Journal
frames/s for a softwareĆbased MPEG video decoder. It is also
significant that MPEG video decoding at 30 frames/s is
achieved by an entryĆlevel rather than a highĆend workĆ
station. This is in the context of a fullĆfunction video player
on the HP MPower 2.0 product. With MPEG audio decoding
(also done by software), the frame rate is usually above 15
frames/s, even for the lowĆend Model 712/60 workstation,
and around 24 frames/s for the Model 712/80 workstation.
We expect to see continuous improvement in the MPEG
decoding rate as the performance of the generalĆpurpose
processors increases. With PAĆRISC processors, there has
been roughly a doubling of performance every 18 to 24
months. This would imply that larger frames sizes, multiple
video streams, or MPEG-2 streams may be decoded in the
future by such multimediaĆenhanced generalĆpurpose
processors.
The success of the MPEG software decoder performance
tuning depended on close, interdivisional teamwork distribĆ
uted across four HP sites at Chelmsford, Cupertino, Palo Alto,
and Fort Collins. We would like to thank all the other memĆ
bers of the multimedia architecture team, especially Vasudev
Bhaskaran, Peter Kaczowka, Behzad Razban, Pat McElhatton,
Larry Thayer, Konstantine Konstantinides, and Larry McMaĆ
han. We would also like to thank the HP MPower team, the
PAĆRISC extensions team, especially Michael Mahon, the PA
7100LC team, especially Mark Forsyth and Charlie Kohlhardt,
the Model 712 team, the performance lab, especially the late
Tian Wang, and the compiler lab, especially Pat Kwan, for
their support.
1. HewlettĆPackard Journal, Vol. 45, no. 2, April 1994.
2. R. Lee, Precision Architecture," IEEE Computer, Vol. 22 no. 1,
January 1989, pp. 78Ć91.
3. R. Lee, M. Mahon, and D. Morris, Pathlength Reduction Features in
the PAĆRISC Architecture," Proceedings of IEEE Compcon, February
1992, pp. 129Ć135.
4. L. McMahan and R. Lee, Pathlengths of SPEC Benchmarks for
PAĆRISC, MIPS and SPARC," Proceedings of IEEE Compcon,
February 1993, pp. 481Ć490.
5. Digital Compression and Coding of ContinuousĆTone Still Images,
CCIT REC. T.81 0918Ć1, July 1992.
6. P. Knebel, et al, HP's PA7100LC: A LowĆCost Superscalar PAĆRISC
Processor," Proceedings of IEEE Compcon, February 1993, pp.
441Ć447.
7. S. Undy, et al, A VLSI ChipĆSet for Graphics and Multimedia
Workstations," IEEE Micro, Vol. 14, no. 2, April 1994, pp. 10Ć22.
8. L. Gwennap, New PAĆRISC Processor Decodes MPEG Video,"
Microprocessor Report, Vol. 8, no. 1, January 1994, pp . 16Ć17.
9. Coding of Moving Pictures and Associated Audio for Digital StorĆ
age Media up to 1.5 Mbit/s, ISO/IEC JTCI CD 11172, 1991.
10. K. Patel, B. Smith, and L. Rowe, Performance of a Software
MPEG Video Decoder," Proceedings of First ACM International ConĆ
ference on Multimedia, August 1993, pp. 75Ć82.
11. D. LeGall, MPEG - A Video Compression Standard for MultiĆ
media Applications," Communications of the ACM, April 1991, Vol.
34 no. 4, pp 46Ć58.
12. K. Konstantinides, Fast Subband Filtering in MPEG Audio CodĆ
ing," IEEE Signal Processing Letters, Vol. 1, no. 2, February 1994, pp.
26Ć28.
 Hewlett-Packard Company 1995
Download