school

thing1's amazing school repo
Log | Files | Refs | Submodules | README

tokenizer.c (4601B)


      1 #include <stdio.h>
      2 #include <stdlib.h>
      3 #include <string.h>
      4 
      5 #include "../global/types.h"
      6 #include "../global/util.h"
      7 
      8 #define MAXARGS 8
      9 #define MAXFUNCS 2048
     10 #define MAXVARS 8192
     11 
     12 int getBuiltIn(char *func, ast_node *node); // checks if a function is built in to zippy
     13 void expressFunction(char *function, ast_node *node); // puts a string into the ast_node struct
     14 ast_node *tokenize(char *input); // does the tokenization
     15 void printAst(ast_node *root); // shows an ast and its sub nodes
     16 
     17 int getBuiltIn(char *func, ast_node *node){ // returns NIL when the function doesn't exist
     18 	if (strcmp(func, "defun") == 0){
     19 		node->func->builtInFunc= DEFUN;
     20 	}else if (strcmp(func, "let") == 0){
     21 		node->func->builtInFunc = LET;
     22 	}else if (strcmp(func, "set") == 0){
     23 		node->func->builtInFunc = SET;
     24 	}else if (strcmp(func, "if") == 0){
     25 		node->func->builtInFunc = IF;
     26 	}else if (strcmp(func, "elif") == 0){
     27 		node->func->builtInFunc = ELIF;
     28 	}else if (strcmp(func, "else") == 0){
     29 		node->func->builtInFunc = ELSE;
     30 	}else if (strcmp(func, "for") == 0){
     31 		node->func->builtInFunc = FOR;
     32 	}else if (strcmp(func, "while") == 0){
     33 		node->func->builtInFunc = WHILE;
     34 	}else if (strcmp(func, "symbol") == 0){
     35 		node->func->builtInFunc = SYMBOL;
     36 	}else if (strcmp(func, "+") == 0){
     37 		node->func->builtInFunc = ADD;
     38 	}else if (strcmp(func, "-") == 0){
     39 		node->func->builtInFunc = SUB;
     40 	}else if (strcmp(func, "*") == 0){
     41 		node->func->builtInFunc = MUL;
     42 	}else if (strcmp(func, "/") == 0){
     43 		node->func->builtInFunc = DIV;
     44 	}else if (strcmp(func, "=") == 0){
     45 		node->func->builtInFunc = EQ;
     46 	}else if (strcmp(func, "!=") == 0){
     47 		node->func->builtInFunc = NEQ;
     48 	}else if (strcmp(func, ">") == 0){
     49 		node->func->builtInFunc = GT;
     50 	}else if (strcmp(func, "<") == 0){
     51 		node->func->builtInFunc = LT;
     52 	}else if (strcmp(func, ">=") == 0){
     53 		node->func->builtInFunc = GTEQ;
     54 	}else if (strcmp(func, "<=") == 0){
     55 		node->func->builtInFunc = LTEQ;
     56 	}else if (strcmp(func, "cast") == 0){
     57 		node->func->builtInFunc = CAST;
     58 	}else if (strcmp(func, "typeof") == 0){
     59 		node->func->builtInFunc = TYPEOF;
     60 	}else if (strcmp(func, "exit") == 0){
     61 		node->func->builtInFunc = EXIT;
     62 	}else if (strcmp(func, "return") == 0){
     63 		node->func->builtInFunc = RETURN;
     64 	}else if (strcmp(func, "write") == 0){
     65 		node->func->builtInFunc = WRITE;
     66 	}else {
     67 		node->func->builtInFunc = NIL;
     68 		return -1;
     69 	}
     70 	return 0;
     71 }
     72 
     73 void expressFunction(char *function, ast_node *node){
     74 	node->func = CheckedMalloc(sizeof(functionToken));
     75 	if ((getBuiltIn(function, node)) == NIL) // non user defined function
     76 		node->func->name = function;
     77 }
     78 
     79 ast_node *tokenize(char *input){
     80 	ast_node *node, *child;
     81 
     82 	char *exp, *function, **args;
     83 	size_t i = 0, argCount = -1;
     84 	int depth = 0;
     85 
     86 	node = CheckedMalloc(sizeof(ast_node));
     87 	node->args = CheckedMalloc(sizeof(ast_node) * MAXARGS);
     88 	node->literalArgs = CheckedMalloc(sizeof(void *) * MAXARGS);
     89 
     90 	if (input[i] == '('){
     91 		depth = 1;
     92 		i++;
     93 		exp = CheckedMalloc(strlen(input));
     94 		while (depth != 0){
     95 			if (input[i] == ' ') argCount++;
     96 			if (input[i] == '('){
     97 				child = tokenize(&input[i]);
     98 				node->args[argCount] = child;
     99 				depth++;
    100 			} else if (input[i] == ')'){
    101 				depth--;
    102 			}
    103 			exp[i - 1] = input[i];
    104 			if (input[i] == '\0'){
    105 				fprintf(stderr, "error brace not closed\n");
    106 				exit(1);
    107 			}
    108 			i++;
    109 		}
    110 		exp[i-2] = '\0';
    111 		exp = CheckedRealloc(exp, strlen(exp) + 1);
    112 	}else if (input[i] == '"'){
    113 		i++;
    114 		while (input[i] != '"') i++;
    115 	}
    116 
    117 
    118 	i = 0;
    119 	function = CheckedMalloc(strlen(exp));
    120 	while (exp[i] != ' '){
    121 		function[i] = exp[i];
    122 		i++;	
    123 	}
    124 
    125 	function[i] = '\0';
    126 	function = CheckedRealloc(function, i);
    127 
    128 	expressFunction(function, node);
    129 
    130 	char *tok, *saveptr, *expptr = exp;
    131 	
    132 	exp = strstr(exp, " ");
    133 	tok = strtok_r(exp, " ", &saveptr);
    134 
    135 	argCount = 0;
    136 	depth = 0;
    137 	do {
    138 		if (node->args[argCount] != NULL){
    139 			argCount++;
    140 		}
    141 		if (tok[0] != '(' && tok[strlen(tok)-1] != ')' && depth == 0){
    142 			if (node->args[argCount] == NULL){
    143 				node->literalArgs[argCount] = giveType(tok);
    144 			}
    145 			argCount++;
    146 		}
    147 		
    148 		if (tok[0] == '(') depth++;
    149 		if (tok[strlen(tok)-1] == ')') depth--;
    150 		tok = strtok_r(NULL, " ", &saveptr);
    151 	} while (tok != NULL);
    152 
    153 	free(expptr);
    154 
    155 	return node;
    156 }
    157 
    158 void printAst(ast_node *root){
    159 	printf("-----------\n");
    160 	if (root->func->builtInFunc == -1) printf("function: %s\n", root->func->name);
    161 	else printf("function (built in): %d\n", root->func->builtInFunc);
    162 	for (int i = 0; i < MAXARGS + 1; i++){
    163 		if (root->args[i] != NULL) printAst(root->args[i]);
    164 		else {
    165 			if (root->literalArgs[i] != NULL) printf("%s\n", root->literalArgs[i]);
    166 		}
    167 	}
    168 	printf("-----------\n");
    169 }