tokenizer.c (4601B)
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 5 #include "../global/types.h" 6 #include "../global/util.h" 7 8 #define MAXARGS 8 9 #define MAXFUNCS 2048 10 #define MAXVARS 8192 11 12 int getBuiltIn(char *func, ast_node *node); // checks if a function is built in to zippy 13 void expressFunction(char *function, ast_node *node); // puts a string into the ast_node struct 14 ast_node *tokenize(char *input); // does the tokenization 15 void printAst(ast_node *root); // shows an ast and its sub nodes 16 17 int getBuiltIn(char *func, ast_node *node){ // returns NIL when the function doesn't exist 18 if (strcmp(func, "defun") == 0){ 19 node->func->builtInFunc= DEFUN; 20 }else if (strcmp(func, "let") == 0){ 21 node->func->builtInFunc = LET; 22 }else if (strcmp(func, "set") == 0){ 23 node->func->builtInFunc = SET; 24 }else if (strcmp(func, "if") == 0){ 25 node->func->builtInFunc = IF; 26 }else if (strcmp(func, "elif") == 0){ 27 node->func->builtInFunc = ELIF; 28 }else if (strcmp(func, "else") == 0){ 29 node->func->builtInFunc = ELSE; 30 }else if (strcmp(func, "for") == 0){ 31 node->func->builtInFunc = FOR; 32 }else if (strcmp(func, "while") == 0){ 33 node->func->builtInFunc = WHILE; 34 }else if (strcmp(func, "symbol") == 0){ 35 node->func->builtInFunc = SYMBOL; 36 }else if (strcmp(func, "+") == 0){ 37 node->func->builtInFunc = ADD; 38 }else if (strcmp(func, "-") == 0){ 39 node->func->builtInFunc = SUB; 40 }else if (strcmp(func, "*") == 0){ 41 node->func->builtInFunc = MUL; 42 }else if (strcmp(func, "/") == 0){ 43 node->func->builtInFunc = DIV; 44 }else if (strcmp(func, "=") == 0){ 45 node->func->builtInFunc = EQ; 46 }else if (strcmp(func, "!=") == 0){ 47 node->func->builtInFunc = NEQ; 48 }else if (strcmp(func, ">") == 0){ 49 node->func->builtInFunc = GT; 50 }else if (strcmp(func, "<") == 0){ 51 node->func->builtInFunc = LT; 52 }else if (strcmp(func, ">=") == 0){ 53 node->func->builtInFunc = GTEQ; 54 }else if (strcmp(func, "<=") == 0){ 55 node->func->builtInFunc = LTEQ; 56 }else if (strcmp(func, "cast") == 0){ 57 node->func->builtInFunc = CAST; 58 }else if (strcmp(func, "typeof") == 0){ 59 node->func->builtInFunc = TYPEOF; 60 }else if (strcmp(func, "exit") == 0){ 61 node->func->builtInFunc = EXIT; 62 }else if (strcmp(func, "return") == 0){ 63 node->func->builtInFunc = RETURN; 64 }else if (strcmp(func, "write") == 0){ 65 node->func->builtInFunc = WRITE; 66 }else { 67 node->func->builtInFunc = NIL; 68 return -1; 69 } 70 return 0; 71 } 72 73 void expressFunction(char *function, ast_node *node){ 74 node->func = CheckedMalloc(sizeof(functionToken)); 75 if ((getBuiltIn(function, node)) == NIL) // non user defined function 76 node->func->name = function; 77 } 78 79 ast_node *tokenize(char *input){ 80 ast_node *node, *child; 81 82 char *exp, *function, **args; 83 size_t i = 0, argCount = -1; 84 int depth = 0; 85 86 node = CheckedMalloc(sizeof(ast_node)); 87 node->args = CheckedMalloc(sizeof(ast_node) * MAXARGS); 88 node->literalArgs = CheckedMalloc(sizeof(void *) * MAXARGS); 89 90 if (input[i] == '('){ 91 depth = 1; 92 i++; 93 exp = CheckedMalloc(strlen(input)); 94 while (depth != 0){ 95 if (input[i] == ' ') argCount++; 96 if (input[i] == '('){ 97 child = tokenize(&input[i]); 98 node->args[argCount] = child; 99 depth++; 100 } else if (input[i] == ')'){ 101 depth--; 102 } 103 exp[i - 1] = input[i]; 104 if (input[i] == '\0'){ 105 fprintf(stderr, "error brace not closed\n"); 106 exit(1); 107 } 108 i++; 109 } 110 exp[i-2] = '\0'; 111 exp = CheckedRealloc(exp, strlen(exp) + 1); 112 }else if (input[i] == '"'){ 113 i++; 114 while (input[i] != '"') i++; 115 } 116 117 118 i = 0; 119 function = CheckedMalloc(strlen(exp)); 120 while (exp[i] != ' '){ 121 function[i] = exp[i]; 122 i++; 123 } 124 125 function[i] = '\0'; 126 function = CheckedRealloc(function, i); 127 128 expressFunction(function, node); 129 130 char *tok, *saveptr, *expptr = exp; 131 132 exp = strstr(exp, " "); 133 tok = strtok_r(exp, " ", &saveptr); 134 135 argCount = 0; 136 depth = 0; 137 do { 138 if (node->args[argCount] != NULL){ 139 argCount++; 140 } 141 if (tok[0] != '(' && tok[strlen(tok)-1] != ')' && depth == 0){ 142 if (node->args[argCount] == NULL){ 143 node->literalArgs[argCount] = giveType(tok); 144 } 145 argCount++; 146 } 147 148 if (tok[0] == '(') depth++; 149 if (tok[strlen(tok)-1] == ')') depth--; 150 tok = strtok_r(NULL, " ", &saveptr); 151 } while (tok != NULL); 152 153 free(expptr); 154 155 return node; 156 } 157 158 void printAst(ast_node *root){ 159 printf("-----------\n"); 160 if (root->func->builtInFunc == -1) printf("function: %s\n", root->func->name); 161 else printf("function (built in): %d\n", root->func->builtInFunc); 162 for (int i = 0; i < MAXARGS + 1; i++){ 163 if (root->args[i] != NULL) printAst(root->args[i]); 164 else { 165 if (root->literalArgs[i] != NULL) printf("%s\n", root->literalArgs[i]); 166 } 167 } 168 printf("-----------\n"); 169 }