/* ** dfa.s: lex implementation in assembler (gnu gas - at&t syntax) ** ** known keywords: #ifdef, #endif, #define, #undef ** ** to compile: as --gstabs dfa.s -o dfa.o; ld dfa.o -o dfa ** ** Test file: copy & paste the folowing text on a terminal, or ** anyother text you prefer. (sourced file must be ** called `test', and must reside within the same ** dir the program is being executed) ** --------------------------------------------------- cat > test << "EOF" #define some thing #ifdef OTHER echo some text #endif #undef ALGO EOF ** --------------------------------------------------- */ .data strok: .asciz "KEYWORD" strok_len = . - strok /* equivalence class table (7 bits ascii for ease) */ yy_ec: /* Oct Dec Hex Char * === === === =============================== */ .byte 0 /* 000 0 00 NUL '\0' */ .byte 1 /* 001 1 01 SOH (start of heading) */ .byte 1 /* 002 2 02 STX (start of text) */ .byte 1 /* 003 3 03 ETX (end of text) */ .byte 1 /* 004 4 04 EOT (end of transmission) */ .byte 1 /* 005 5 05 ENQ (enquiry) */ .byte 1 /* 006 6 06 ACK (acknowledge) */ .byte 1 /* 007 7 07 BEL '\a' (bell) */ .byte 1 /* 010 8 08 BS '\b' (backspace) */ .byte 1 /* 011 9 09 HT '\t' (horizontal tab) */ .byte 1 /* 012 10 0A LF '\n' (new line) */ .byte 1 /* 013 11 0B VT '\v' (vertical tab) */ .byte 1 /* 014 12 0C FF '\f' (form feed) */ .byte 1 /* 015 13 0D CR '\r' (carriage ret) */ .byte 1 /* 016 14 0E SO (shift out) */ .byte 1 /* 017 15 0F SI (shift in) */ .byte 1 /* 020 16 10 DLE (data link escape) */ .byte 1 /* 021 17 11 DC1 (device control 1) */ .byte 1 /* 022 18 12 DC2 (device control 2) */ .byte 1 /* 023 19 13 DC3 (device control 3) */ .byte 1 /* 024 20 14 DC4 (device control 4) */ .byte 1 /* 025 21 15 NAK (negative ack.) */ .byte 1 /* 026 22 16 SYN (synchronous idle) */ .byte 1 /* 027 23 17 ETB (end of trans. blk) */ .byte 1 /* 030 24 18 CAN (cancel) */ .byte 1 /* 031 25 19 EM (end of medium) */ .byte 1 /* 032 26 1A SUB (substitute) */ .byte 1 /* 033 27 1B ESC (escape) */ .byte 1 /* 034 28 1C FS (file separator) */ .byte 1 /* 035 29 1D GS (group separator) */ .byte 1 /* 036 30 1E RS (record separator) */ .byte 1 /* 037 31 1F US (unit separator) */ .byte 1 /* 040 32 20 SPACE */ .byte 1 /* 041 33 21 ! */ .byte 1 /* 042 34 22 " */ .byte 2 /* 043 35 23 # */ .byte 1 /* 044 36 24 $ */ .byte 1 /* 045 37 25 % */ .byte 1 /* 046 38 26 & */ .byte 1 /* 047 39 27 ' */ .byte 1 /* 050 40 28 ( */ .byte 1 /* 051 41 29 ) */ .byte 1 /* 052 42 2A * */ .byte 1 /* 053 43 2B + */ .byte 1 /* 054 44 2C , */ .byte 1 /* 055 45 2D - */ .byte 1 /* 056 46 2E . */ .byte 1 /* 057 47 2F / */ .byte 1 /* 060 48 30 0 */ .byte 1 /* 061 49 31 1 */ .byte 1 /* 062 50 32 2 */ .byte 1 /* 063 51 33 3 */ .byte 1 /* 064 52 34 4 */ .byte 1 /* 065 53 35 5 */ .byte 1 /* 066 54 36 6 */ .byte 1 /* 067 55 37 7 */ .byte 1 /* 070 56 38 8 */ .byte 1 /* 071 57 39 9 */ .byte 1 /* 072 58 3A : */ .byte 1 /* 073 59 3B ; */ .byte 1 /* 074 60 3C < */ .byte 1 /* 075 61 3D = */ .byte 1 /* 076 62 3E > */ .byte 1 /* 077 63 3F ? */ .byte 1 /* 100 64 40 @ */ .byte 1 /* 101 65 41 A */ .byte 1 /* 102 66 42 B */ .byte 1 /* 103 67 43 C */ .byte 3 /* 104 68 44 D */ .byte 4 /* 105 69 45 E */ .byte 5 /* 106 70 46 F */ .byte 1 /* 107 71 47 G */ .byte 1 /* 110 72 48 H */ .byte 6 /* 111 73 49 I */ .byte 1 /* 112 74 4A J */ .byte 1 /* 113 75 4B K */ .byte 1 /* 114 76 4C L */ .byte 1 /* 115 77 4D M */ .byte 7 /* 116 78 4E N */ .byte 1 /* 117 79 4F O */ .byte 1 /* 120 80 50 P */ .byte 1 /* 121 81 51 Q */ .byte 1 /* 122 82 52 R */ .byte 1 /* 123 83 53 S */ .byte 1 /* 124 84 54 T */ .byte 8 /* 125 85 55 U */ .byte 1 /* 126 86 56 V */ .byte 1 /* 127 87 57 W */ .byte 1 /* 130 88 58 X */ .byte 1 /* 131 89 59 Y */ .byte 1 /* 132 90 5A Z */ .byte 1 /* 133 91 5B [ */ .byte 1 /* 134 92 5C \ */ .byte 1 /* 135 93 5D ] */ .byte 1 /* 136 94 5E ^ */ .byte 1 /* 137 95 5F _ */ .byte 1 /* 140 96 60 ` */ .byte 1 /* 141 97 61 a */ .byte 1 /* 142 98 62 b */ .byte 1 /* 143 99 63 c */ .byte 3 /* 144 100 64 d */ .byte 4 /* 145 101 65 e */ .byte 5 /* 146 102 66 f */ .byte 1 /* 147 103 67 g */ .byte 1 /* 150 104 68 h */ .byte 6 /* 151 105 69 i */ .byte 1 /* 152 106 6A j */ .byte 1 /* 153 107 6B k */ .byte 1 /* 154 108 6C l */ .byte 1 /* 155 109 6D m */ .byte 7 /* 156 110 6E n */ .byte 1 /* 157 111 6F o */ .byte 1 /* 160 112 70 p */ .byte 1 /* 161 113 71 q */ .byte 1 /* 162 114 72 r */ .byte 1 /* 163 115 73 s */ .byte 1 /* 164 116 74 t */ .byte 8 /* 165 117 75 u */ .byte 1 /* 166 118 76 v */ .byte 1 /* 167 119 77 w */ .byte 1 /* 170 120 78 x */ .byte 1 /* 171 121 79 y */ .byte 1 /* 172 122 7A z */ .byte 1 /* 173 123 7B { */ .byte 1 /* 174 124 7C | */ .byte 1 /* 175 125 7D } */ .byte 1 /* 176 126 7E ~ */ .byte 1 /* 177 127 7F DEL */ yy_accept: .byte 0, 0, 0, 6, 5, 5, 0, 0, 0, 0 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 .byte 0, 0, 0, 4, 3, 2, 1, 0 yy_meta: .byte 0, 1, 1, 1, 1, 1, 1, 1, 1 yy_base: .byte 0, 24, 23, 24, 27, 0, 19, 15, 16, 13 .byte 14, 15, 14, 13, 9, 8, 9, 8, 4, 5 .byte 4, 2, 1, 27, 27, 27, 27, 27, 0 yy_def: .byte 0, 28, 28, 27, 27, 27, 27, 27, 27, 27 .byte 27, 27, 27, 27, 27, 27, 27, 27, 27, 27 .byte 27, 27, 27, 27, 27, 27, 27, 0, 27 yy_nxt: .byte 0, 4, 27, 6, 7, 26, 8, 25, 9, 24 .byte 23, 22, 21, 20, 19, 18, 17, 16, 15, 14 .byte 13, 12, 11, 10, 27, 5, 5, 3, 27, 27 .byte 27, 27, 27, 27, 27, 27 yy_chk: .byte 0, 28, 0, 5, 5, 22, 5, 21, 5, 20 .byte 19, 18, 17, 16, 15, 14, 13, 12, 11, 10 .byte 9, 8, 7, 6, 3, 2, 1, 27, 27, 27 .byte 27, 27, 27, 27, 27, 27 .equ buffer_len, 256 .bss .comm buffer, buffer_len .comm last_accepting_state, 4 /* last accepting state */ .comm last_accepting_mpos, 4 /* last accepting address */ .comm yy_cp, 4 /* Pointer to current buffer's position */ .comm yy_bp, 4 /* Pointer to the begining to the current run */ .comm yy_act, 4 /* action */ .comm yy_len, 4 /* temp len */ .text testfile: .asciz "test" .globl _start .type _start, @function _start: movl $5, %eax movl $testfile, %ebx int $0x80 movl %eax, %ebx movl $3, %eax movl $buffer, %ecx movl $buffer_len, %edx int $0x80 pushl %eax pushl $buffer call lex addl $4, %esp movl $1, %eax movl $0, %ebx int $0x80 /* ** %esi: current machine's state ** %eax: pointer to input buffer ** %ebx: current char ascii value ** %ecx & %edx: temp stuff */ .type lex, @function .globl lex lex: nop pushl %ebp movl %esp, %ebp movl 0x8(%ebp), %eax movl 0xc(%ebp), %edi .lex_init: movl $1, %esi /* start state */ xorl %ecx, %ecx /* cleant temp register */ xorl %edx, %edx /* clean temp register */ movl %eax, yy_cp movl %eax, yy_bp /* ** Match code */ .lex_do: movb (%eax,1), %bl movb yy_ec(,%ebx,1), %cl cmpb $0, %bl je .function_end cmpb $0, yy_accept(,%esi,1) jg .lex_do_assign jmp .lex_while .lex_do_assign: movl %esi, last_accepting_state /* store last accepting state */ movl %eax, last_accepting_mpos /* store address */ .lex_while: movb yy_base(,%esi,1), %dl addb %cl, %dl movb yy_chk(,%edx,1), %dl cmpl %edx, %esi je .lex_while_end movb yy_def(,%esi,1), %dl movl %edx, %esi cmpl $28, %esi jl .lex_while movb yy_meta(,%ecx,1), %cl jmp .lex_while .lex_while_end: movb yy_base(,%esi,1), %dl addb %cl, %dl movb yy_nxt(,%edx,1), %dl movl %edx,%esi incb %al movl %eax, yy_cp cmpb $27, yy_base(,%esi,1) jne .lex_do /* ** Find action code */ .find_action: movb yy_accept(,%esi,1), %dl movl %edx, yy_act cmpb $0, %dl je .find_action_backup jmp .find_action_continue .find_action_backup: nop movl last_accepting_mpos, %eax movl %eax, yy_cp movl last_accepting_state, %esi movb yy_accept(,%esi,1), %dl movl %edx, yy_act .find_action_continue: /* YY_DO_BEFORE_ACTION */ movl yy_cp, %ecx subl yy_bp, %ecx movl %ecx, yy_len /* ** Do action code */ .do_action: cmpb $0, %dl je .do_backup cmpb $1, %dl je .do_ok cmpb $2, %dl je .do_ok cmpb $3, %dl je .do_ok cmpb $4, %dl je .do_ok cmpb $5, %dl je .do_echo jmp .other .do_backup: /* undo YY_DO_BEFORE_ACTION */ movl last_accepting_mpos, %eax movl last_accepting_state, %esi jmp .find_action .do_ok: pusha movl $4, %eax movl $1, %ebx movl $strok, %ecx movl $strok_len, %edx int $0x80 popa jmp .lex_init .do_echo: pusha movl $4, %eax movl $1, %ebx movl yy_bp, %ecx movl yy_len, %edx int $0x80 popa jmp .lex_init .other: jmp .function_end .do_action_eof: jmp .function_end .function_end: movl %ebp, %esp popl %ebp ret