%option noyywrap %{ /************************************************************************* * 56002.l - 56002 assembly lexical scanner * *-----------------------------------------------------------------------* * Jake Janovetz 1995 * * University of Illinois * ************************************************************************* * * * This is the lexical scanner for 56002 assembly code. It is provided * * to be compiled by lex or flex. A list of tokens must have already * * been created by yacc or bison. * * * * By default, the scanner assumes input to be taken from a file. * * However, by defining 'LINEASM' (with a compiler switch), the * * scanner takes input from a string. Since lex and flex are not * * compatible in their handling of this, read the comments around the * * LINEASM definition test. * ************************************************************************/ #include "parser.h" /* yacc generated header file */ #include #include /* required for atof to work properly */ #include char *str; /* The string which is parsed */ int lineno = 1; int SearchMnemonic(char *name); unsigned int HexToInt(char *hex); unsigned int BinToInt(char *bin); #ifdef LINEASM /* * The following are for AT&T lex input redirection -- from a string */ /* #undef input #undef unput #define input() (*str++) #define unput(c) (*--str = c) */ /* * The following are for GNU flex input redirection -- from a string */ #undef YY_INPUT #define YY_INPUT(b, r, ms) (r=str_yyinput(b,ms)) #endif /************************************************************************* * Function Name: str_yyinput * * Author: Jake Janovetz * * Origin Date: 11/10/95 * * * * Inputs: char *bufr - Character buffer to copy the data to. Flex * * provides this. * * int max_size - Number of characters to copy into the buffer. * * * * Outputs: Number of characters copied. * * * * Description: This function provides a way for flex (NOT lex) to * * acquire input from a string. The YY_INPUT macro is redefined to * * call this function. It simply copies data from the global * * input string into flex's string buffer. * * * * * * * * Major Modification History: * * Date: Name: Description: * * ----- ----- ------------ * * 11/10/95 Jake J Creation * ************************************************************************/ int str_yyinput(char *bufr, int max_size) { int n = strlen(str); if (max_size < n) { n = max_size; } memcpy(bufr, str, n); str += n; return(n); } %} ws [ \t] HEX $[0-9a-fA-F]+ BINARY \%[01]+ LABEL_CHAR [a-zA-Z0-9_] nl \n %% {ws} ; /* munch up white-space */ ";".* ; /* ignore commends to end of line */ ^{ws}*";".*{nl} { lineno++; } /* comment-only line */ ^{ws}*{nl} { lineno++; } /* blank line */ {nl} { lineno++; return(EOL); } /* end-of-line */ [rR][0-7] { yylval.ival = atoi((char *)&yytext[1]); return(REG_Rn); } [mM][0-7] { yylval.ival = atoi((char *)&yytext[1]); return(REG_Mn); } [nN][0-7] { yylval.ival = atoi((char *)&yytext[1]); return(REG_Nn); } [aA] { return (REG_A); } [aA]0 { return (REG_A0); } [aA]1 { return (REG_A1); } [aA]2 { return (REG_A2); } [bB] { return (REG_B); } [bB]0 { return (REG_B0); } [bB]1 { return (REG_B1); } [bB]2 { return (REG_B2); } [xX] { return (REG_X); } [xX]0 { return (REG_X0); } [xX]1 { return (REG_X1); } [yY] { return (REG_Y); } [yY]0 { return (REG_Y0); } [yY]1 { return (REG_Y1); } [mM][rR] { return (REG_MR); } [cC][cC][rR] { return (REG_CCR); } [sS][rR] { return (REG_SR); } [oO][mM][rR] { return (REG_OMR); } [sS][pP] { return (REG_SP); } [sS][sS][hH] { return (REG_SSH); } [sS][sS][lL] { return (REG_SSL); } [lL][aA] { return (REG_LA); } [lL][cC] { return (REG_LC); } [xX]: { return (MEM_X); } [yY]: { return (MEM_Y); } [pP]: { return (MEM_P); } [0-9]+ { yylval.ival = atoi((char *)yytext); return (INTEGER); } [0-9]+[eE][+-]?[0-9]+ | [0-9]+"."[0-9]*[eE][+-]?[0-9]+ | "."[0-9]*[eE][+-]?[0-9]+ { yylval.fval = atof((char *)yytext); printf("%s\n", yytext); return (FLOAT); } {LABEL_CHAR}+ { strcpy(yylval.str, (char *)yytext); return(SearchMnemonic((char *)yytext)); } %{ /* Hex numbers appear as "$aAb932F" * and must be converted to an integer. */ %} {HEX} { yylval.ival = HexToInt((char *)yytext + 1); return(INTEGER); } %{ /* Binary numbers appear as "%011101010" * and must be converted to an integer. */ %} {BINARY} { yylval.ival = BinToInt((char *)yytext + 1); return(INTEGER); } . { return(yytext[0]); } %% /* * When a label string is found, the lexer searches through a list of * possible assembly mnemonics for a match. If one is found, the * appropriate token is sent to the parser. Otherwise, the string is * sent as a label. The parser should then do symbol lookup on the label. * * The following is the table of known mnemonics. */ typedef struct { char opcode[8]; int token; int cond; } op_token_list; op_token_list slist[] = { {"abs", OP_ABS, 0}, {"adc", OP_ADC, 0}, {"add", OP_ADD, 0}, {"addl", OP_ADDL, 0}, {"addr", OP_ADDR, 0}, {"and", OP_AND, 0}, {"andi", OP_ANDI, 0}, {"asl", OP_ASL, 0}, {"asr", OP_ASR, 0}, {"bchg", OP_BCHG, 0}, {"bclr", OP_BCLR, 0}, {"bset", OP_BSET, 0}, {"btst", OP_BTST, 0}, {"clr", OP_CLR, 0}, {"cmp", OP_CMP, 0}, {"cmpm", OP_CMPM, 0}, {"debug", OP_DEBUG, 0}, {"debugcc", OP_DEBUGCC, 0x0}, {"debughs", OP_DEBUGCC, 0x0}, {"debugge", OP_DEBUGCC, 0x1}, {"debugne", OP_DEBUGCC, 0x2}, {"debugpl", OP_DEBUGCC, 0x3}, {"debugnn", OP_DEBUGCC, 0x4}, {"debugec", OP_DEBUGCC, 0x5}, {"debuglc", OP_DEBUGCC, 0x6}, {"debuggt", OP_DEBUGCC, 0x7}, {"debugcs", OP_DEBUGCC, 0x8}, {"debuglo", OP_DEBUGCC, 0x8}, {"debuglt", OP_DEBUGCC, 0x9}, {"debugeq", OP_DEBUGCC, 0xa}, {"debugmi", OP_DEBUGCC, 0xb}, {"debugnr", OP_DEBUGCC, 0xc}, {"debuges", OP_DEBUGCC, 0xd}, {"debugls", OP_DEBUGCC, 0xe}, {"debugle", OP_DEBUGCC, 0xf}, {"dec", OP_DEC, 0}, {"div", OP_DIV, 0}, {"do", OP_DO, 0}, {"enddo", OP_ENDDO, 0}, {"eor", OP_EOR, 0}, {"illegal", OP_ILLEGAL, 0}, {"inc", OP_INC, 0}, {"jcc", OP_JCC, 0x0}, {"jhs", OP_JCC, 0x0}, {"jge", OP_JCC, 0x1}, {"jne", OP_JCC, 0x2}, {"jpl", OP_JCC, 0x3}, {"jnn", OP_JCC, 0x4}, {"jec", OP_JCC, 0x5}, {"jlc", OP_JCC, 0x6}, {"jgt", OP_JCC, 0x7}, {"jcs", OP_JCC, 0x8}, {"jlo", OP_JCC, 0x8}, {"jlt", OP_JCC, 0x9}, {"jeq", OP_JCC, 0xa}, {"jmi", OP_JCC, 0xb}, {"jnr", OP_JCC, 0xc}, {"jes", OP_JCC, 0xd}, {"jls", OP_JCC, 0xe}, {"jle", OP_JCC, 0xf}, {"jscc", OP_JSCC, 0x0}, {"jshs", OP_JSCC, 0x0}, {"jsge", OP_JSCC, 0x1}, {"jsne", OP_JSCC, 0x2}, {"jspl", OP_JSCC, 0x3}, {"jsnn", OP_JSCC, 0x4}, {"jsec", OP_JSCC, 0x5}, {"jslc", OP_JSCC, 0x6}, {"jsgt", OP_JSCC, 0x7}, {"jscs", OP_JSCC, 0x8}, {"jslo", OP_JSCC, 0x8}, {"jslt", OP_JSCC, 0x9}, {"jseq", OP_JSCC, 0xa}, {"jsmi", OP_JSCC, 0xb}, {"jsnr", OP_JSCC, 0xc}, {"jses", OP_JSCC, 0xd}, {"jsls", OP_JSCC, 0xe}, {"jsle", OP_JSCC, 0xf}, {"jclr", OP_JCLR, 0}, {"jsclr", OP_JSCLR, 0}, {"jset", OP_JSET, 0}, {"jsset", OP_JSSET, 0}, {"jmp", OP_JMP, 0}, {"jsr", OP_JSR, 0}, {"lsl", OP_LSL, 0}, {"lsr", OP_LSR, 0}, {"lua", OP_LUA, 0}, {"mac", OP_MAC, 0}, {"macr", OP_MACR, 0}, {"move", OP_MOVE, 0}, {"movec", OP_MOVE, 0}, {"movem", OP_MOVE, 0}, {"movep", OP_MOVEP, 0}, {"mpy", OP_MPY, 0}, {"mpyr", OP_MPYR, 0}, {"neg", OP_NEG, 0}, {"nop", OP_NOP, 0}, {"norm", OP_NORM, 0}, {"not", OP_NOT, 0}, {"or", OP_OR, 0}, {"ori", OP_ORI, 0}, {"rep", OP_REP, 0}, {"reset", OP_RESET, 0}, {"rnd", OP_RND, 0}, {"rol", OP_ROL, 0}, {"ror", OP_ROR, 0}, {"rti", OP_RTI, 0}, {"rts", OP_RTS, 0}, {"sbc", OP_SBC, 0}, {"stop", OP_STOP, 0}, {"sub", OP_SUB, 0}, {"subl", OP_SUBL, 0}, {"subr", OP_SUBR, 0}, {"swi", OP_SWI, 0}, {"tcc", OP_TCC, 0x0}, {"ths", OP_TCC, 0x0}, {"tge", OP_TCC, 0x1}, {"tne", OP_TCC, 0x2}, {"tpl", OP_TCC, 0x3}, {"tnn", OP_TCC, 0x4}, {"tec", OP_TCC, 0x5}, {"tlc", OP_TCC, 0x6}, {"tgt", OP_TCC, 0x7}, {"tcs", OP_TCC, 0x8}, {"tlo", OP_TCC, 0x8}, {"tlt", OP_TCC, 0x9}, {"teq", OP_TCC, 0xa}, {"tmi", OP_TCC, 0xb}, {"tnr", OP_TCC, 0xc}, {"tes", OP_TCC, 0xd}, {"tls", OP_TCC, 0xe}, {"tle", OP_TCC, 0xf}, {"tfr", OP_TFR, 0}, {"tst", OP_TST, 0}, {"wait", OP_WAIT, 0}, {"", 0, 0} }; /************************************************************************* * Function Name: HexToInt * * Author: Jake Janovetz * * Origin Date: 9/20/95 * * * * Inputs: char *hex - Character string containing hex digits to be * * converted to an integer. * * * * Outputs: unsigned int representation of the hex number. * * * * Description: Provides a simple conversion from a string of hex digits * * to an integer. This is necessary for the lexer to pass on an * * integer to the parser. * * * * Major Modification History: * * Date: Name: Description: * * ----- ----- ------------ * * 09/20/95 Jake J Creation * ************************************************************************/ unsigned int HexToInt(char *hex) { static char hexdigits[] = "0123456789abcdef"; char digit; int j; unsigned int result; result = 0; while (*hex != '\0') { digit = tolower(*hex++); result <<= 4; for (j=0; j<15; j++) { if (digit == hexdigits[j]) break; } result += j; } return(result); } /************************************************************************* * Function Name: BinToInt * * Author: Jake Janovetz * * Origin Date: 9/20/95 * * * * Inputs: char *bin - String of binary characters to be converted to * * an integer. * * * * Outputs: unsigned int representation of the binary number. * * * * Description: Provides a simple conversion from a string of binary * * digits to an integer. This is necessary for the lexer to pass on * * an integer to the parser. * * * * Major Modification History: * * Date: Name: Description: * * ----- ----- ------------ * * 09/20/95 Jake J Creation * ************************************************************************/ unsigned int BinToInt(char *bin) { unsigned int result; result = 0; while (*bin != '\0') { result <<= 1; result += (*bin++=='0')?(0):(1); } return(result); } /************************************************************************* * Function Name: SearchMnemonic * * Author: Jake Janovetz * * Origin Date: 9/10/95 * * * * Inputs: char *name - Text string that the lexer recognized as either * * a label or a mnemonic. * * * * Outputs: Token value to return to the lexer which, in turn, is * * returned to the parser. * * * * Description: When the lexer recognizes a string as either a label or * * a mnemonic, it sends the string to this function. This function * * searches through the list of assembly mnemonics for a match. If * * a match is found, it returns the opcode token. Otherwise, it * * decides that the string is a label and returns the LABEL token. * * * * Major Modification History: * * Date: Name: Description: * * ----- ----- ------------ * * 09/10/95 Jake J Creation * ************************************************************************/ int SearchMnemonic(char *name) { int i; for (i=0; i