! " # % " & ' ( % ) $ & # $ * & ! " #$ " $ $ $ + # + % " 0 # , # - 1 & & , + +/ " . 2 & %" + 2 '# % ( ' " -$ )* + # ' , 3 4 2 – GXGXXG 5 – PRGRP 5 + 6 "9 2 # 7 + 8 2 & %" %" '# % # 2 * 2 5 5 3 ' :!; !< CXX(XX)CXXXXXXXXXXXXHXXXH< 42 2 4 & 857 # %" ( # $ # " %" '" $ $ " % ).+ # ' , " '" $ $ $ " %( 2 # 2 , / 0 $ 0, 857 # 1 $ $ 2 "9 # * / %( 2 # "9 4 # * 2 7 # 0# α52 9< # β5 2 2 . : * 52 % ! 2 = < # 52 4 α5 ; # % 0, ") 2 3 , " $ $ "+ / 4 /0 0, " % / % 504 / # $ " % # ' , 9 %" " % "( %" ( ' , ! 2 . # ' , - & " # # # # 2 # 7 " Lehninger, Principles of Biochemistry, 3rd edition ! & '& $% # # 67. !> ? 7 . 5 # ,$ - 3 , , %2 2 . & / " " %& %!(! " ) ! / " %& " / " %& " / " %& " / " %& " / " %& " 8 1 , ) "+ $ # " " 3 , , # 3 9 % "$ # " " % " * %!@ - .* A 5%! ).: : ; + * , ) "+ 3 $ # " " , # 3 , 9 % "$ # " " " %1 * 1 * % 0* 9 % , " 3 " " = = = = := 6 = = = :6 % # % " -$ " , , % , ,$ " < " " # %!@ - .* A 5%! ).: : ; + "= 05 * %!@ - .* A 5%! ).: : ; + 80 # 0* # " $ $ 0* 8α0 % 0* 8α " $ $ " < < 3 * 8α 7 # 2 2 9# 7 # 3 2 # 7 1 $ , $ " , http://www.expasy.org/prosite/ , =14; $ 14; " 1 $ $ , " 14.> 4 VAV GRB2 NCK1 CRK %$ $ " % " '" $ $ SGIRIIVVALYDYEAIHHEDLSFQKGDQMVVLEESGEWWKARSLATRKEGYIPSNYVARV DSLETEEWFFKGISRKDAERQLLAPGNMLGSFMIRDSETTKGSYSLSVRDYDPRQGDTVK HYKIRTLDNGGFYISPRSTFSTLQELVDHYKKGNDGLCQKLSVPCMSSKPQKPWEKDAWE IPRESLKLEKKLGAGQFGEVWMATYNKHTKVAVKTMKPGSMSVEAFLAEANVMKTLQHDK LVKLHAVVTKEPIYIITEFMAKGSLLDFLKSDEGSKQPLPKLIDFSAQIAEGMAFIEQRN YIHRDLRAANILVSASLVCKIADFGLARVIEDNEYTAREGAKFPIKWTAPEAINFGSFTI KSDVWSFGILLMEIVTYGRIPYPGMSNPEVIRALERGYRMPRPENCPEELYNIMMRCWKN RPEERPTFEYIQSVLDDFYTATESQEEIP & 1 ' " & & )! 4 " & 5B # " B +, -+ ./ 0 0 1 .+ 5> # ' " , " , " " , & & 1 $ $ # " ( 7 &4 $ &$ Superfamilies Families (2) Folds: %& 9 " " 3 % , 1 " $ $ & # " < $ ) 8 All alpha proteins All beta proteins Alpha and beta proteins (α α/β β) Mainly parallel beta sheets (beta-alpha-beta units) Alpha and beta proteins (α α+β β) Mainly antiparallel beta sheets (segregated alpha and beta regions) Multi-domain proteins (alpha and beta) Folds consisting of two or more domains belonging to different classes Membrane and cell surface proteins and peptides Does not include proteins in the immune system Small proteins Usually dominated by metal ligand, heme, and/or disulfide bridges Folds 23 &4 5 (6 53 %$ 3 %7 %7 Classes "" % %& http://scop.mrc-lmb.cam.ac.uk/scop/ http://protein.toulouse.inra.fr/prodom.html 1 $ $ "" % " ' < , "< " "3, " " $ $ ", " # 3 ' Proteins are defined as having a common fold if they have the same major secondary structures in the same arrangement and with the same topological connections (3) Superfamily: & Proteins that have low sequence identities, but whose structural and functional features suggest that a common evolutionary origin is probable are placed together in superfamilies (4) Family: ' Proteins clustered together into families are clearly evolutionarily related. Generally, this means that pairwise residue identities between the proteins are 30% and greater '& "2 % , 3 ? 1 0 $ " "$ " 01 $ $ ) % ( http://www.biochem.ucl.ac.uk/bsm/cath/cath.html 4+ 0 < ( ! 0 < ( 2 # 2 C / 8* ! http://www.biochem.ucl.ac.uk/bsm/cath/ 0 < ( !!" # ! http://www.biochem.ucl.ac.uk/bsm/cath/cath.html 40 < ( 40 "" . ! 2 4 2 # * ≥ @E E 8 ?ED 3 3 & ≥ @E E<?ED < 8 3 & # 2 ≥ %> D < ? E D 3 3 & 11 &( 2 2 -6 1 F - A@A. ≥ !E D 42 2 2 & "" *( ' , " " .( ' "" ;( / , @ " " " 5( 3 1 ' 1 $ $ " # 40 , $ 40 1$ .0 ' " 1 3 , 0%$ " " " 1 " " A 5 # 4C& 404 & ' '* % 5 ' # #2 #2 6 " ,' ) 4&+ " ' , " 0 %$ " B0" ' " " : " " 6 2 " < "9 # " # $ % ! %& %' % (( ! )" ! ( ((! Database of 498 SCOP “Folds” or “Superfamilies” The overall pair-wise comparisons of 498 folds lead to a 498 x 498 matrix of similarity scores Sijs, where Sij is the alignment score between the ith and jth folds. An appropriate method for handling such data matrices as a whole is metric matrix distance geometry. The similarity score matrix [Sij] is first converted to a distance matrix [Dij] by using Dij = Smax - Sij, where Smax is the maximum similarity score among all pairs of folds. : = 7 The distance matrix is then transformed to a metric (or Gram) matrix [Mij] by using Mij = Dij2 - Dio2 - Djo2 B http://www.ncbi.nlm.nih.gov/Structure/cdd/cdd.shtml $ % %& %' % where Di0, the distance between the ith fold and the geometric centroid of all N = 498 folds. The eigen values of the metric matrix define an orthogonal system of axes, called factors. These axes pass through the geometric centroid of the points representing all observed folds and correspond to a decreasing order of the amount of information each factor represents. B (( ! )" ! ( ((! ;<= , ' http://ekhidna.biocenter.helsinki.fi:8080/dali/index.html 1 $ $ 0# " D ,' " + " $ $ 3 # - " 2 2 2 / < 4 3 " E 7 . 1 -$ Hb 2 / & F G # + % # & 7 7 4 # 2# 2 4 2 5 5 7 & Mb Hb VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF-DLS-----HGSAQVKGHGKKVADALTNAV ||| .. | |.|| | . | . | | | | | | | .| .| || | || . Mb VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTEAEMKASEDLKKHGVTVLTALGAIL Hb AHVD-DMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR-----| | . || | .. . .| .. | |..| . . | | . ||. Mb KK-KGHHEAELKPLAQSHATKHKIPIKYLEFISEAIIHVLHSRHPGDFGADAQGAMNKALELFRKDIAAKYKELGYQG , " / 4 3 "" >" 1 % -$ < " $ & " 2 3 ; < 1 '1 / / + + ) 7 # • 3 & - 2 7 :α5:α * # # 3 & , # ,- .+ C . • - θ 0 +' . - 8 & * '1 +θ 0 1 2 φ0 . φ+ 1 + +Σ Σ φ * . 3 & , -1 . -2 " 1 '1 , θ " 2 4 φΕ + -1 3 & . -2 3 • θ -+ 4+ 2 3 5 - - 1 $ $ % " C ; + = + & < . & ' " < !%%* !% - A A %. 9 2# # # 8 2 9 # # G , 3 " 52 9 # # " + # # G + 8 + 8 .< 9 ' 2 5: 7 # 5 J & 2 # 7 # 4 I H# # 2 0 $ # 7 4 4 2 2 2 7 # 9 # 7 & # 67 #' " : :α5:α , - 7 + " & + 6 " 7α # (& # 67 " % , ; 1 $ $ . - !E D + & *5 , & , " θ4 * ' 4 # 4 # Asp tRNA Synthetase CspA Gene 5 ssDNA Binding Protein CspB & % " # " 4( " < Staphylococcal Nuclease % " 4 " " $ $ 5 & '& ' . > ) > > $ Topoisomerase I ' > % http://ub.cbm.uam.es/mammoth/mult/index.php %!< %@ 5 ) -!E E ). 2 2 STING Millennium: 3 2 3 # 2 & 3 # G G G 2 3 2 #G # 2 B G 4 2 & 3 7 2 G # & 7 & G # 2 3 %? 3 6 4 2 = #2 Nucleic Acids Research, 2003, Vol. 31, No. 13, 3386-3392 Nucleic Acids Research, 2003, Vol. 31, No. 13, 3386-3392