Uploaded by Cherrymae Ybanez

Ybañez,Cherry Mae Final-Research-Project -ITE 14

advertisement
Name: Cherry Mae P. Ybañez
ITE 14-IQR1
Cocke–Younger–Kasami or CYK ALGORITHM
Cocke–Younger–Kasami algorithm (CYK)
Parsing with context-free grammars
Class
Data structure
Worstcase performance
String
O(n³.|G|), where:


n is length of the string
|G| is the size of the CNF grammar
CYK algorithm is a parsing algorithm for context free grammar. In order to apply
CYK algorithm to a grammar, it must be in Chomsky Normal Form. It uses a dynamic
programming algorithm to tell whether a string is in the language of a grammar.
• The membership problem:
– Problem:
• Given a context-free grammar G and a string w
– G = (V, ∑ ,P , S) where
» V finite set of variables
» ∑ (the alphabet) finite set of terminal symbols
» P finite set of rules
» S start symbol (distinguished element of V)
» V and ∑ are assumed to be disjoint
– G is used to generate the string of a language
– Question: Is w in L(G)?
The CYK Algorithm
• J. Cocke
• D. Younger,
• T. Kasami
– Independently developed an algorithm to answer this question.
The CYK Algorithm Basics
– The Structure of the rules in a Chomsky Normal Form grammar
– Uses a “dynamic programming” or “table-filling algorithm”
Chomsky Normal Form
• Normal Form is described by a set of conditions that each rule in
the grammar must satisfy
• Context-free grammar is in CNF if each rule has one of the
following forms:
– A → BC at most 2 symbols on right side
– A → a, or terminal symbol
– S → λ null string
where B, C Є V – {S}
Construct a Triangular Table
• Each row corresponds to one length of substrings
– Bottom Row – Strings of length 1
– Second from Bottom Row – Strings of length 2
– Top Row – string ‘w’
Construct a Triangular Table
• Xi, I is the set of variables A such that
A → w, is a production of G
• Compare at most n pairs of previously computed sets:
(Xi, I , Xi+1, j ), (Xi, i+1 , Xi+2, j ) … (Xi, j-1, Xj, j )
THEOREM
• The CYK Algorithm correctly computes X i j for all i and j; thus w is in
L(G) if and only if S is in X1n.
• The running time of the algorithm is O(n3).
Here’s another example;
S -> AB | BC
A -> BA | a
B -> CC | b
C -> AB | a
We check if baaba is in L(G):
1. We first insert single length rules into our table.
2. We then fill the remaining cells of our table.
3. We observe that S is in the cell (1, 5), Hence, the string baaba
belongs to L(G).
Time and Space Complexity :
 Time Complexity –
O(n3.|G|)
Where |G| is the number of rules in the given grammar.

Space Complexity –
O(n2)
Advantages of CYK algorithm:
CYK is a bottom-up chart parsing algorithm characterized by 3 interesting
features:
 I its worst case parsing complexity is O(n3) (where n is the number
of words of the sentence to be analyzed);
 Its a very simple algorithm that is easy to implement;
 I it can provide partial analysis of syntactically correct subsequences
of syntactically incorrect sequences.
Disadvantage of CYK algorithm;
However, its standard implementation suffers from two important
drawbacks:
 I the CF grammar used by the parser has to be in a predefined
format (the Chomsky normal form) and therefore the grammar
usually needs to be first converted into this predefined format;
 I the complexity is always O(n3) even when the grammar is in fact
regular.
CODES
// C Program for CYK Algorithm//
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
Bool checkNTR(char ch){
Char big[] = {‘S’,’A’,’B’,’C’,’D’,’E’,’F’,’G’,’H’};
For(int i=0; i<strlen(big); i++){
If(ch==big[i])
Return true;
}
Return false;
}
Bool checkTR(char ch){
Char small[] = {‘a’,’b’,’c’,’d’,’e’};
For(int i=0; i<strlen(small); i++){
If(ch==small[i])
Return true;
}
Return false;
}
Bool checkPR(char ch[]){
If(strlen(ch)==1){
If(checkTR(ch[0])){
Return true;
}
} else if(strlen(ch)==2){
If(checkNTR(ch[0]) && checkNTR(ch[1])){
Return true;
}
}
Return false;
}
Bool checkEle(char El[], char ch){
For(int i=0; i<strlen(El); i++){
If(ch==El[i]){
Return true;
}
}
Return false;
}int main(){
Printf(“DEMO Input for Production : S->AB, S->e, A->a, B->b :\n\n”);
Printf(“No. of production rules = 4\n\nEnter generating
symbols:\n\nSymbol Count\nS 2\nA 1\nB 1\n\nEnter Production rules
:\n\nNon- Terminal  Production\nS  AB\nS  e\nA  a\nB 
b\n\n\n”);
Int n_pr=0, n_pr2=0, pr_cnt=0;
Printf(“Allowed Symbols: ‘S’,’A’,’B’,’C’,’D’,’E’,’F’,’G’,’H’ \n”);
Printf(“Allowed terminals: ‘a’,’b’,’c’,’d’ and ‘e’:epsilon/lambda\n”);
Printf(“\n\nNo. Of production rules = “);
Scanf(“%d”,&n_pr);
Char ntr[n_pr], pr[n_pr][3];
Int ntr_cnt[n_pr];
Printf(“\nEnter generating symbols:\n\nSymbol Count\n”);
Fflush(stdin);
Int i1=0;
While(n_pr2<n_pr){
Scanf(“%c %d”,&ntr[i1],&ntr_cnt[i1]);
Fflush(stdin);
If(!checkNTR(ntr[i1])){
Printf(“Invalid Input. Please choose from ‘Allowed Symbols’ and
try again.\n\nExiting…\n”);
Return -1;
}
N_pr2 += ntr_cnt[i1];
I1++;
}
If(n_pr != n_pr2){
Printf(“Invalid Input for ‘No. of production rules’. Please try
again.\n\nExiting…\n”);
Return -1;
}
Printf(“\n\nEnter Production rules : \n”);
Printf(“\nNon- Terminal  Production\n”);
Fflush(stdin);
For(int i=0; i<i1; i++){
For(int j=0; j<ntr_cnt[i]; j++) {
Printf(“%c  “,ntr[i]);
Pr_cnt += (i+j);
Fflush(stdin);
Pr[pr_cnt][1]=pr[pr_cnt][2]=’\0’;
Gets(pr[pr_cnt]);
Fflush(stdin);
If(!checkPR(pr[pr_cnt])){
Printf(“Invalid Input. Please write correct CNF and try
again.\n\nExiting…\n”);
Return -1;
}
}
}
Pr_cnt = 0;
For(int i=0; i<i1; i++){
For(int j=0; j<ntr_cnt[i]; j++) {
Pr_cnt += (i+j);
Printf(“\n%c  %s”,ntr[i],pr[pr_cnt]);
}
}
Fflush(stdin);
Int n=0;
Printf(“\n\nInput String: “);
Char s1[10];
Gets(s1);
Fflush(stdin);
N = strlen(s1);
Char st[n];
Strcpy(st,s1);
Char tb[n][n][10]; // Table
If(n==0){
Printf(“Empty String!\n”);
Return -1;
}
For(int i=0; i<n; i++){
For(int j=0; j<n; j++){
For(int k=0; k<10; k++)
Tb[i][j][k]=’\0’;
}
}
For(int i=0;i<n;i++){
Pr_cnt = 0;
Int k2 = 0;
For(int i2=0; i2<i1; i2++){
For(int j2=0; j2<ntr_cnt[i2]; j2++) {
Pr_cnt += (i2+j2);
If(checkTR(pr[pr_cnt][0]) && pr[pr_cnt][0]==st[i]){
Tb[i][i][k2]=ntr[i2];
K2+=1;
}
}
}
}
For(int l=1; l<=(n-1); l++){
For(int i=0, j=0; i<=(n-l-1); i++){
J = i+l;
For(int k=I; k<=(j-1); k++){
Pr_cnt = 0;
Int k2 = 0;
For(int i2=0; i2<i1; i2++){
For(int j2=0; j2<ntr_cnt[i2]; j2++) {
Pr_cnt += (i2+j2);
If(checkNTR(pr[pr_cnt][0])){
If(checkEle(tb[i][k],pr[pr_cnt][0]) &&
checkEle(tb[k+1][j],pr[pr_cnt][1])){
Tb[i][j][k2]=ntr[i2];
K2+=1;
}
}
}
}
}
}}
Printf(“\n\n\nTable:\n\n”);
For(int i=0; i<n; i++){
Printf(“|”);
For(int j=0; j<n; j++){
Printf(“%s |”,tb[i][j]);
}
Printf(“\n”);
}
If(checkEle(tb[0][n-1],’S’)){
Printf(“\n\nThis String is Accepted!\n\n”);
} else{
Printf(“\n\nThis String is Rejected!\n\n”);
}
Return 0;
}
References:
https://www.geeksforgeeks.org/cyk-algorithm-for-context-freegrammar/amp/
https://github.com/shah-deep/CYK-algorithm-in-C-program
Download