Chapter 10 - Character Strings Array of Characters char word[] = { ‘H’, ‘e’, ‘l’, ‘l’, ‘o’, ‘!’ }; word[0] word[1] word[2] word[3] word[4] word[5] 'H' 'e' 'l' 'l' 'o' '!' Program 10.1 /* * Function to concatenate two character strings */ #include <stdio.h> concat (char result[], char str1[], int n1, char str2[], int n2) { int i; // copy str1 to result for (i = 0; i < n1; i++) result[i] = str1[i]; for (i = 0; i < n2; i++) result[n1 + i] = str2[i]; } Program 10.1 (continued) main() { char s1[5] = {'T', 'e', 's', 't', ' '}; char s2[6] = {'w', 'o', 'r', 'k', 's', '.'}; char s3[11]; int i; concat (s3, s1, 5, s2, 6); for (i = 0; i < 11; i++) printf("%c", s3[i]); printf ("\n"); system ("PAUSE"); } Program 10.1 Output Variable Length Character Strings char word[] = { ‘H’, ‘e’, ‘l’, ‘l’, ‘o’, ‘!’, ‘\0’ }; word[0] word[1] word[2] word[3] word[4] word[5] word[6] 'H' 'e' 'l' 'l' 'o' '!' '\0' NULL Character Terminates a String Program 10.2 /* * Function to count the number of characters in a string */ #include <stdio.h> int string_length (char string[]) { int count = 0; while(string[count] != '\0') count++; return (count); } Program 10.2 (continued) main() { char word1[] = {'a', 's', 't', 'e', 'r', '\0'}; char word2[] = {'a', 't', '\0'}; char word3[] = {'a', 'w', 'e', '\0'}; int i; printf ("%i %i %i\n", string_length (word1), string_length (word2), string_length (word3)); system ("PAUSE"); } Program 10.2 Output Initializing and Displaying Character Strings char word[] = “Hello!”; These Statements Are Equivalent char word[] = { ‘H’, ‘e’, ‘l’, ‘l’, ‘o’, ‘!’, ‘\0’ }; char word[7] = “Hello!”; char word[6] = “Hello!”; Leave Room for the NULL Character Program 10.3 /* * Function to concatenate two character strings */ #include <stdio.h> concat (char result[], char str1[], char str2[]) { int i; int j; // copy str1 to result for (i = 0; str1[i] != '\0'; i++) result[i] = str1[i]; for (j = 0; str2[j] != '\0'; j++) result[i + j] = str2[j]; result[i+j] = '\0'; } Program 10.3 (continued) main() { char s1[] = "Test "; char s2[] = "works."; char s3[20]; concat (s3, s1, s2); printf("%s\n", s3); system ("PAUSE"); } Program 10.3 Output Testing Two Character Strings for Equality Since the C Programming Language does not support a data type of string we cannot directly test two strings to see if they are equal with a statement such as if ( string1 == string2 ) <string.h> Program 10.4 /* * Function to determine if two strings are equal */ #include <stdio.h> #define TRUE 1 #define FALSE 0 int equal_strings (char s1[], char s2[]) { int i = 0; while ((s1[i] == s2[i]) && (s1[i] != '\0') && (s2[i] != '\0')) i++; if ((s1[i] == '\0') && (s2[i] == '\0')) return(TRUE); else return(FALSE); } Program 10.4 (continued) main() { char stra[] = "string compare test"; char strb[] = "string"; printf ("%i\n", equal_strings (stra, strb)); printf ("%i\n", equal_strings (stra, stra)); printf ("%i\n", equal_strings (strb, "string")); system ("PAUSE"); } Program 10.4 Output Inputting Character Strings char string[81]; scanf ( “%s”, string); char s1[81], s2[81], s3[81]; scanf ( “%s%s%s”, s1, s2, s3); Program 10.5 /* * Program to illustrate the %s scanf format characters */ #include <stdio.h> main() { char s1[81]; char s2[81]; char s3[81]; printf ("Enter text:\n"); scanf ("%s%s%s", s1, s2, s3); printf ("\ns1 = %s\ns2 = %s\ns3 = %s\n", s1, s2, s3); system ("PAUSE"); } Program 10.5 Output Program 10.6 /* * Function to read a line of text from a terminal */ #include <stdio.h> read_line (char buffer[]) { char character; int i = 0; do { character = getchar(); buffer[i] = character; i++; } while (character != '\n'); buffer[i-1] = '\0'; } Program 10.6 (continued) main() { char line[81]; int i; for (i=0; i < 3; i++) { read_line(line); printf ("%s\n\n", line); } system ("PAUSE"); } Program 10.6 Output Program 10.7 #include <stdio.h> #define TRUE 1 #define FALSE 0 /* * Function to determine if a character is alphabetic */ alphabetic (char c) { if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) return (TRUE); else return (FALSE); } Program 10.7 (continued) /* * Function to count the number of words in a string */ count_words (char string[]) { int looking_for_word = TRUE; int word_count = 0; int i; for (i=0; string[i] != '\0'; i++) { if (alphabetic(string[i])) { if (looking_for_word) { word_count++; looking_for_word = FALSE; } } else looking_for_word = TRUE; } return (word_count); } Program 10.7 (continued) main() { char text1[] = "Well, here goes."; char text2[] = "And here we go... again."; printf ("%s - words = %i\n", text1, count_words (text1)); printf ("%s - words = %i\n", text2, count_words (text2)); system ("PAUSE"); } Program 10.7 Output Execution of count_words Function i 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 string[i] word_count looking_for_word 'W' 'e' 'l' 'l' ',' ‘ ' 'h' 'e' 'r' 'e' ‘ ' 'g' 'o' 'e' 's' '.' '\0' 0 1 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 3 1 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 1 1 The NULL String A Character String that contains no characters other than the NULL Character has a special name in the C Programming Language, it is called the NULL String. The string length will correctly return 0. char buffer[100] = “”; Program 10.8 #include <stdio.h> #define TRUE 1 #define FALSE 0 /***** Insert alphabetic function here *****/ /***** Insert read_line function here *****/ /***** Insert count_words function here *****/ Program 10.8 (continued) main() { char text[81]; int end_of_text = 0; int total_words = 0; printf ("Type in your text.\n"); printf ("When you are done, press 'RETURN'.\n\n"); while (!end_of_text) { read_line (text); if (text[0] == '\0') end_of_text = TRUE; else total_words += count_words (text); } printf("\nThere are %i words in the above text.\n", total_words); system ("PAUSE"); } Program 10.8 Output Escape Characters Escape Character \a \b \f \n \r \t \v \\ \” \' \? \nnn \xnn Name audible alert backspace form feed newline carriage return horizontal tab vertical tab backslash double quote single quote question mark octal character value nnn hexadecimal character value nn More on Constant Strings If you put a backslash character at the very end of the line and followed it immediately by a carriage return, it will tell the C Compiler to ignore the end of line. This line continuation technique is used primarily for continuing long constant character strings. char letters[] = “abcdefghijklmnopqrstuvwxyz\ ABCDEFGHIJKLMNOPQRSTUVWXYZ”; An even easier way of breaking up long character strings is to divide them into two or more adjacent strings. char letters[] = “abcdefghijklmnopqrstuvwxyz” “ABCDEFGHIJKLMNOPQRSTUVWXYZ”; Character Strings, Structures, and Arrays Suppose we wanted to write a computer program that acted as a dictionary. One of the first thoughts would be to representation of the word and it definition. Since the word and its definition are logically related, the notion of a structure comes immediately to mind. struct entry { char word[10]; char definition[50]; }; struct entry dictionary[100]; Program 10.9 /* * Dictionary lookup program */ #include <stdio.h> #define TRUE 1 #define FALSE 0 struct entry { char word[10]; char definition[50]; }; /***** Insert equal_string function here *****/ Program 10.9 /* * Function to lookup a word inside a dictionary */ int lookup (struct entry dictionary[], char search [], int entries) { int i; for (i=0; i < entries; i++) if (equal_strings (search, dictionary[i].word)) return (i); return (-1); } Program 10.9 main() { struct entry dictionary[100] = { { "aardvark", "a burrowing African mammal" { "abyss", "a bottomless pit" { "acumen", "mentally sharp; keen" { "addle", "to become confused" { "aerie", "a high nest" { "affix", "to append; attach" { "agar", "a jelly made from seaweed" { "ahoy", "a nautical call of greeting" { "aigrette", "an ornamental cluster of feathers" { "ajar", "partially opened" char word[10]; int entries = 10; int entry_number; }, }, }, }, }, }, }, }, }, } }; Program 10.9 printf("Enter word: "); scanf("%9s", word); entry_number = lookup (dictionary, word, entries); if (entry_number != -1) printf ("%s\n", dictionary[entry_number].definition); else printf ("Sorry, that word is not in my dictionary.\n"); system ("PAUSE"); } Program 10.9 Output A Better Search Method Binary Search Algorithm Step 1: Set low to 0, high to n – 1 Step2: If low > high, x does not exist in M and the algorithm terminates Step 3: Set mid to (low + high) / 2 Step 4: if M[mid] < x, set low to mid +1 and go to Step 2 Step 5: if M[mid] > x, set high to mid -1 and go to Step 2 Step 6: M[mid] equals x and the algorithm terminates Binary Search Algorithm /* * Binary Search Algorithm */ int lookup (struct entry M[], char x[], int n) { int low = 0; int high = n - 1; int mid; int result; /* Step 1: */ /* Step 1: */ while (low <= high) { mid = (low + high) / 2; /* Step 3: */ result = compare_strings (M[mid].word, x); if (result == -1) low = mid + 1; /* Step 4: */ else if (result == 1) high = mid - 1; /* Step 5: */ else return (mid); /* Step 6: */ } return (-1); /* Step 2: */ } Examples of Binary Search Examples of Binary Search Examples of Binary Search Program 10.10 /* * Dictionary lookup program */ #include <stdio.h> struct entry { char word[10]; char definition[50]; }; Program 10.10 (continued) /* * Function to compare two character strings */ int compare_strings (char s1[], char s2[]) { int i = 0; while ((s1[i] == s2[i]) && (s1[i] != '\0') && (s2[i] != '\0')) i++; if (s1[i] < s2[i] ) /* s1 < s2 */ return(-1); else if (s1[i] == s2[i]) /* s1 == s2 */ return (0); else /* s1 > s2 */ return(+1); } Program 10.10 (continued) /* * Function to lookup a word inside a dictionary */ int lookup (struct entry dictionary[], char search [], int entries) { int low = 0; int high = entries - 1; int mid; int result; while (low <= high) { mid = (low + high) / 2; result = compare_strings (dictionary[mid].word, search); if (result == -1) low = mid + 1; else if (result == 1) high = mid - 1; else return (mid); /* found it */ } return (-1); /* not found */ } Program 10.10 (continued) main() { struct entry dictionary[100] = { { "aardvark", "a burrowing African mammal" { "abyss", "a bottomless pit" { "acumen", "mentally sharp; keen" { "addle", "to become confused" { "aerie", "a high nest" { "affix", "to append; attach" { "agar", "a jelly made from seaweed" { "ahoy", "a nautical call of greeting" { "aigrette", "an ornamental cluster of feathers" { "ajar", "partially opened" char word[10]; int entries = 10; int entry_number; }, }, }, }, }, }, }, }, }, } }; Program 10.10 (continued) printf("Enter word: "); scanf("%9s", word); entry_number = lookup (dictionary, word, entries); if (entry_number != -1) printf ("%s\n", dictionary[entry_number].definition); else printf ("Sorry, that word is not in my dictionary.\n"); system ("PAUSE"); } Program 10.10 Output Program 10.10 Output (Rerun) ASCII Character Table 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 0 1 2 3 4 5 6 7 10 11 12 13 14 15 16 17 20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37 40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57 60 61 62 63 64 65 66 67 70 71 72 73 74 75 76 77 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F 0000 0000 0000 0001 0000 0010 0000 0011 0000 0100 0000 0101 0000 0110 0000 0111 0000 1000 0000 1001 0000 1010 0000 1011 0000 1100 0000 1101 0000 1110 0000 1111 0001 0000 0001 0001 0001 0010 0001 0011 0001 0100 0001 0101 0001 0110 0001 0111 0001 1000 0001 1001 0001 1010 0001 1011 0001 1100 0001 1101 0001 1110 0001 1111 0010 0000 0010 0001 0010 0010 0010 0011 0010 0100 0010 0101 0010 0110 0010 0111 0010 1000 0010 1001 0010 1010 0010 1011 0010 1100 0010 1101 0010 1110 0010 1111 0011 0000 0011 0001 0011 0010 0011 0011 0011 0100 0011 0101 0011 0110 0011 0111 0011 1000 0011 1001 0011 1010 0011 1011 0011 1100 0011 1101 0011 1110 0011 1111 NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US SP ! " # $ % & ' ( ) * + , . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? (Null char.) (Start of Header) (Start of Text) (End of Text) (End of Transmission) (Enquiry) (Acknowledgment) (Bell) (Backspace) (Horizontal Tab) (Line Feed) (Vertical Tab) (Form Feed) (Carriage Return) (Shift Out) (Shift In) (Data Link Escape) (XON) (Device Control 1) (Device Control 2) (XOFF)(Device Control 3) (Device Control 4) (Negativ Acknowledgemnt) (Synchronous Idle) (End of Trans. Block) (Cancel) (End of Medium) (Substitute) (Escape) (File Separator) (Group Separator) (Reqst to Send)(Rec. Sep.) (Unit Separator) (Space) (exclamation mark) (double quote) (number sign) (dollar sign) (percent) (ampersand) (single quote) (left/open parenthesis) (right/closing parenth.) (asterisk) (plus) (comma) (minus or dash) (dot) (forward slash) (colon) (semi-colon) (less than) (equal sign) (greater than) (question mark) 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 100 101 102 103 104 105 106 107 110 111 112 113 114 115 116 117 120 121 122 123 124 125 126 127 130 131 132 133 134 135 136 137 140 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160 161 162 163 164 165 166 167 170 171 172 173 174 175 176 177 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F 0100 0000 0100 0001 0100 0010 0100 0011 0100 0100 0100 0101 0100 0110 0100 0111 0100 1000 0100 1001 0100 1010 0100 1011 0100 1100 0100 1101 0100 1110 0100 1111 0101 0000 0101 0001 0101 0010 0101 0011 0101 0100 0101 0101 0101 0110 0101 0111 0101 1000 0101 1001 0101 1010 0101 1011 0101 1100 0101 1101 0101 1110 0101 1111 0110 0000 0110 0001 0110 0010 0110 0011 0110 0100 0110 0101 0110 0110 0110 0111 0110 1000 0110 1001 0110 1010 0110 1011 0110 1100 0110 1101 0110 1110 0110 1111 0111 0000 0111 0001 0111 0010 0111 0011 0111 0100 0111 0101 0111 0110 0111 0111 0111 1000 0111 1001 0111 1010 0111 1011 0111 1100 0111 1101 0111 1110 0111 1111 @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ DEL (AT symbol) (left/opening bracket) (back slash) (right/closing bracket) (caret/circumflex) (underscore) (left/opening brace) (vertical bar) (right/closing brace) (tilde) (delete) ASCII Alphabetic Characters 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 101 102 103 104 105 106 107 110 111 112 113 114 115 116 117 120 121 122 123 124 125 126 127 130 131 132 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F 50 51 52 53 54 55 56 57 58 59 5A 0100 0001 0100 0010 0100 0011 0100 0100 0100 0101 0100 0110 0100 0111 0100 1000 0100 1001 0100 1010 0100 1011 0100 1100 0100 1101 0100 1110 0100 1111 0101 0000 0101 0001 0101 0010 0101 0011 0101 0100 0101 0101 0101 0110 0101 0111 0101 1000 0101 1001 0101 1010 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 141 142 143 144 145 146 147 150 151 152 153 154 155 156 157 160 161 162 163 164 165 166 167 170 171 172 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F 70 71 72 73 74 75 76 77 78 79 7A 0110 0001 0110 0010 0110 0011 0110 0100 0110 0101 0110 0110 0110 0111 0110 1000 0110 1001 0110 1010 0110 1011 0110 1100 0110 1101 0110 1110 0110 1111 0111 0000 0111 0001 0111 0010 0111 0011 0111 0100 0111 0101 0111 0110 0111 0111 0111 1000 0111 1001 0111 1010 a b c d e f g h i j k l m n o p q r s t u v w x y z Character Operations Whenever a character constant or variable is used in an expression, it is automatically converted to, and subsequently treated as, an integer value. For example, (‘a’ <= c) && (c <= ‘z’) (97 <= c) && (c <= 122) ASCII Numeric Characters 48 60 30 0011 0000 0 49 61 31 0011 0001 1 50 62 32 0011 0010 2 51 63 33 0011 0011 3 52 64 34 0011 0100 4 53 65 35 0011 0101 5 54 66 36 0011 0110 6 55 67 37 0011 0111 7 56 70 38 0011 1000 8 57 71 39 0011 1001 9 Character Operations Suppose the character variable c contained one of the characters ‘0’ thru ‘9’ and the we wished to convert this value into the corresponding integer 0 thru 9. Since the digits of virtually all character sets are represented by sequential integer values, we can easily convert c into its integer equivalent by subtracting the character constant ‘0’ from it. For example, char c = ‘5’; int i = c – ‘0’; c = 53; i = 53 – 48; Program 10.11 /* * Function to convert a string to an integer */ #include <stdio.h> int string_to_integer (char string[]) { int integer_value; int result = 0; int i = 0; for (i=0; ('0' <= string[i]) && (string[i] <= '9'); i++) { integer_value = string[i] - '0'; result = result *10 + integer_value; } return(result); } Program 10.11 (continued) main() { printf ("%i\n", string_to_integer ("245")); printf ("%i\n", string_to_integer ("100") + 25); printf ("%i\n", string_to_integer ("13x5")); system ("PAUSE"); } Program 10.11 Output