2016-09-30 12 views
-1
/* 
    * tokenizer.c 
    */ 

    // Header files 
    #include <stdio.h> 
    #include <string.h> 
    #include <stdlib.h> 
    #include <ctype.h> 

    // constants for the white spaces 
    // and the unprintable values 
    #define BLANK 0x20 
    #define TAB 0x09 
    #define VTAB 0x0b 
    #define FORM_FEED 0x0c 
    #define NEW_LINE 0x0a 
    #define CARRIAGE_RETURN 0x0d 

    // define the boolean variables for ease of access 
    #define TRUE 1 
    #define FALSE 0 

    /* 
    * Tokenizer type. You need to fill in the type as part of your implementation. 
    */ 
    struct TokenizerT_ 
    { 
     // to hold the actual string provided at the argument 
     char *actual_string; 

     // to mover the pointer with in the provided 
     // string 
     char *pointer; 
    }; 

    typedef struct TokenizerT_ TokenizerT; 

    /* 
    * TKCreate creates a new TokenizerT object for a given token stream 
    * (given as a string). 
    * 
    * TKCreate should copy the arguments so that it is not dependent on 
    * them staying immutable after returning. (In the future, this may change 
    * to increase efficiency.) 
    * 
    * If the function succeeds, it returns a non-NULL TokenizerT. 
    * Else it returns NULL. 
    * 
    * You need to fill in this function as part of your implementation. 
    */ 
    TokenizerT *TKCreate(char *ts) 
    { 
     TokenizerT *token = (TokenizerT *)malloc(sizeof(TokenizerT)); 

     // condition to check whether the provided string is 
     // empty or not. If it is empty return NULL 
     if (strlen(ts) == 0) 
     { 
      return NULL; 
     } 

     token->actual_string = (char *)malloc(sizeof(char) * (strlen(ts) + 1)); 

     // copy the provided string into structure's actual string 
     strcpy(token->actual_string, ts); 

     // set the actual string value to the pointer 
     token->pointer = token->actual_string; 

     // return the token 
     return token; 
    } 

    /* 
    * TKDestroy destroys a TokenizerT object. It should free all dynamically 
    * allocated memory that is part of the object being destroyed. 
    * 
    * You need to fill in this function as part of your implementation. 
    */ 
    void TKDestroy(TokenizerT *tk) 
    { 
     // set the pointer to null 
     tk->pointer = NULL; 

     // free the memory of the pointer 
     free(tk->actual_string); 

     // free the structure pointer 
     free(tk); 
    } 

    /* 
    * isWhiteSpace used to determine if the parameter character 
    * is a specified whitespace constant or not. If it contains 
    * returns TRUE else returns FALSE 
    */ 
    int isWhiteSpace(char ch) 
    { 
     if (ch == BLANK || ch == NEW_LINE || ch == VTAB || ch == TAB || ch == CARRIAGE_RETURN || ch == FORM_FEED) 
     { 
      return TRUE; 
     } 
     else 
     { 
      return FALSE; 
     } 
    } 

    /* 
    * getWhiteSpace: Returns the type of error is the character is? 
    */ 

getWhitespace function gives me the "error: not all control paths return value". I know this error has been asked previously but I did not get how to fix in my code here.代碼是將字符串分解爲令牌。調試我的C代碼會拋出異常。我不知道如何處理異常使用C

char *getWhiteSpace(char ch) 
    { 
     switch (ch) 
     { 
     case BLANK: 
      return "0x20"; 

     case TAB: 
      return "TAB"; 

     case VTAB: 
      return "0x0b"; 

     case NEW_LINE: 
      return "0x0a"; 

     case CARRIAGE_RETURN: 
      return "0x0d"; 

     case FORM_FEED: 
      return "0x0c"; 
     } 
    } 
    /* 
    * TKGetNextToken returns the next token from the token stream as a 
    * character string. Space for the returned token should be dynamically 
    * allocated. The caller is responsible for freeing the space once it is 
    * no longer needed. 
    * 
    * If the function succeeds, it returns a C string (delimited by '\0') 
    * containing the token. Else it returns 0. 
    * 
    * You need to fill in this function as part of your implementation. 
    */ 
    char *TKGetNextToken(TokenizerT *tk) 
    { 
     // Copy the pointer of the structure to the temporary variable 
     // so as to not to modify the original 
     char *temp_ptr = tk->pointer; 

     // define length variable 
     int length = 0; 

     // condition to check whether the given character contains 
     // constant whitespace 
     if (isWhiteSpace(*temp_ptr)) 
     { 
      while (isWhiteSpace(*temp_ptr)) 
      { 
       char *whiteChar = getWhiteSpace(*temp_ptr); 

       // print the respective error message 
       printf("Error : [%s]\n", whiteChar); 
       // increment the pointer 
       *temp_ptr++; 
      } 
     } 
     // store the original length of temp_ptr 
     length = strlen(temp_ptr); 

     // define a new pointer to allocate the token that 
     // is been tokenized 
     char *token = (char *)malloc(length + 1); 

     // copy the string from the pointer of the structure 
     // till the length 
     int i = 0; 

     // to know the length of the string to token at 
     while (!isWhiteSpace(temp_ptr[i]) && i != length) 
     { 
      i++; 
     } 

     // copy the string into the token 
     strncpy(token, temp_ptr, i); 

     // set the end of the string with string terminator character 
     token[i] = '\0'; 

     int j = 0; 

In the for loop I am getting error: signed/unsigned mismatch error. I have declared the type of the j as 'int' so what is really giving me the error and how to fix it.

 // modify the pointer of the struture TokenizerT 
     for (j = 0; j < length - strlen(token); j++) 
     { 
      tk->pointer[j] = temp_ptr[i]; 
      i++; 
     } 
     tk->pointer[j] = '\0'; 

     // return the token   
     return token; 
    } 


    /* 
    * isDecimal: This accepts a string returns a boolean value if the given string is 
    * a decimal value. 
    */ 
    int isDecimalValue(char *s) 
    { 
     // copy the parameter string into local string pointer 
     char *temp_ptr = s; 

     // loop through the string until string terminator character is found 
     while (*temp_ptr != '\0') 
     { 
      // by using isdigit function find whether the given 
      // character is not a digit. 
      if (!isdigit(*temp_ptr++)) 
      { 
       // then return false 
       return FALSE; 
      } 
     } 

     // then return true value 
     return TRUE; 
    } 

    /* 
    * isFloat: This accepts a string and return a boolean value 
    * This is used to check whether provided string is a floating pointing. 
    * This also checks for decimal pointer and/or for an e-notation. 
    */ 
    int isFloatValue(char *s) 
    { 
     // define a temporary string variable 
     char *temp_ep = NULL; 

     // convert the string to long 
     long i = strtol(s, &temp_ep, 0); 

     // condition to check whether the temporary value is 
     // not converted to long or int 
     if (!*temp_ep) 
     { 
      // if the condition is true return false 
      return FALSE; 
     } 

     // condition to check whether the temporary pointer 
     // contains 'e' or 'E' or '.'(period) 
     if (*temp_ep == 'e' || *temp_ep == 'E' || *temp_ep == '.') 
     { 
      // if the condition is true return true 
      return TRUE; 
     } 

     // other wise return false 
     return FALSE; 
    } 


    /* 
    * isOctal: That accepts a string and returns a boolean value 
    * This is used to check if the string is an octal number. 
    * Whereas the first digit is a zero and the proceeding digits are 0-7 
    */ 
    int isOctalValue(char *s) 
    { 
     // copy the string to local variable 
     char *temp_ptr = s; 

     // condition to check whether the first character is '0' or not 
     if (*temp_ptr != '0') 
     { 
      // if true return false 
      return FALSE; 
     } 

     // increment the pointer to check the rest of the characters 
     // contain the octal value(0-7) 
     *temp_ptr++; 

     // loop though until the character is not 8 and 9 
     // and is it also a digit 
     while (isdigit(*temp_ptr) && *temp_ptr != '8' && *temp_ptr != '9') 
     { 
      // increment the pointer 
      *temp_ptr++; 

      // Check whether it is a string terminator. 
      if (*temp_ptr == '\0') 
      { 
       // if string satisfies all the conditions 
       // then return true 
       return TRUE; 
      } 
     } 

     // return false, if the string in a not an octal 
     // number 
     return FALSE; 
    } 


    /* 
    * isHex: This accepts a string and returns a boolean value 
    * This checks if the string begins with '0x' or '0X' and contains 
    * digits 0-9 and alpha A-F characters only 
    */ 
    int isHexValue(char *s) 
    { 
     // copy the string to local variable 
     char *temp_ptr = s; 

     // condition to check whether the starting value contains 
     // '0x' or '0X' 
     if (*temp_ptr++ == '0' && (*temp_ptr == 'x' || *temp_ptr == 'X')) 
     { 
      // loop through the pointer until it reaches end of 
      // the string 
      while (*(++temp_ptr) != '\0') 
      { 
       // condition to check the character contains 
       // hexa decimal value 
       if (!isxdigit(*temp_ptr)) 
       { 
        // if it is not a hexa decimal value return false 
        return FALSE; 
       } 
      } 

      // if the string is hexa decimal value, return true 
      return TRUE; 
     } 

     // if the string is not hexa decimal value, return false 
     return FALSE; 
    } 

    /* 
    * displayTypeOfToken: Accepts the string and prints the type of token 
    * 
    * Token types: 
    * decimal integer - token is a digit (0-9) followed by any number of digits. 
    * octal integer - token is a 0 followed by any number of octal digits (i.e. 0-7). 
    * hexadecimal integer - token is 0x (or 0X) followed by any number of hexadecimal digits (i.e. 0-9, a-f, A-F). 
    * floating point - token is follows the rules for floating-point constants in Java or C. 
    */ 
    void displayTypeOfToken(char *token) 
    { 
     // condition to check if token is floating point and 
     // display the value 
     if (isFloatValue(token)) 
     { 
      printf("Float: %s\n", token); 
     } 

     // condition to check if token is octal value and 
     // display the value 
     else if (isOctalValue(token)) 
     { 
      printf("Octal: %s\n", token); 
     } 

     // condition to check if token is hexa value and 
     // display the value 
     else if (isHexValue(token)) 
     { 
      printf("Hex: %s\n", token); 
     } 

     // condition to check if token is a decimal value and 
     // display the value 
     else if (isDecimalValue(token)) 
     { 
      printf("Decimal: %s\n", token); 
     } 
     // display if all the above conditon fails 
     else 
     { 
      printf("Invalid: '%s'\n", token); 
     } 

     //Free token memory 
     free(token); 
    } 

    /* 
    * main will have a string argument (in argv[1]). 
    * The string argument contains the tokens. 
    * Print out the tokens in the second string in left-to-right order. 
    * Each token should be printed on a separate line. 
    */ 
    int main(int argc, char **argv) 
    { 
     // declare a char pointer to hold the input string 
     char *input = NULL; 

     // declare a pointer to the structure TokenizerT 
     TokenizerT *tokenizer; 

     // declare a character pointer to hold the tokens called at 
     // each call of TKGetNextToken function 
     char *token; 

     // if the length of the argument at command line is greater than 1 
     // then read the input from command line 
     if (argc > 1) 
     { 
      input = (char *)malloc(strlen(argv[1]) + 1); 
      strcpy(input, argv[1]); 
      input[strlen(argv[1])] = '\0'; 
     } 

     // call the function TKCreate by passing the 
     // input string 
     tokenizer = TKCreate(input); 

     //condition to check if there is any problem in setting the value 
     if (tokenizer == NULL) 
     { 
      return 1; 
     } 

     // Print the input value present 
     printf("Provided input is: \"%s\"\n\n", tokenizer->actual_string); 


     // loop through end of the string 
     while (*(token = TKGetNextToken(tokenizer))) 
     { 
      //token = TKGetNextToken(tokenizer); 
      displayTypeOfToken(token); 
     } 

     // free the memory allocated to the structure pointer 
     TKDestroy(tokenizer); 

     return 0; 
    } 
+0

如果j永遠不小於0,爲什麼使它簽名? – stark

回答

0

要解決的getWhitespace功能,你需要添加一個默認的情況下,返回的東西應該沒有其他情況下可能滿足。

例如

default: 
     return "none"; 

如果你有信心,length - strlen(token)永遠不會返回爲負,那麼你可以將它轉換爲簽訂固定不匹配。

+0

這是一個很好的觀點,但isWhiteSpace()只對6個測試的案例返回true。 –

0

函數getWhitespace返回一個char *。如果ch不符合任何情況怎麼辦?您必須在交換機中添加一個默認情況來處理任何其他值。

函數strlen返回一個類型爲size_t的無符號類型。這就是警告的原因,你比較int(已簽名)和size_t。

+0

這是一個很好的觀點,但isWhiteSpace()只對6個測試案例返回true。 –

+0

是的,但編譯器不知道。您必須在默認情況下捕捉交換機中的所有可能值。 –

+0

我的意思是,代碼只是在用'isWhiteSpace()'檢查後確定字符**是**中的一個,然後用懸掛的情況調用函數。所以這不太可能解決異常問題。 –

相關問題