|
Table of Content | Chapter Sixteen (Part 12) |
Although the UCR Standard Library pattern matching routines would probably not be appropriate for writing a full lexical analyzer or compiler they are useful for writing small compilers/assemblers or programs where speed of compilation/assembly is of little concern. One good example is the simple nonsymbolic assembler appearing in the SIM886 simulator for an earlier version of the x86 processors. This "mini-assembler" accepts an x86 assembly language statement and immediately assembles it into memory. This allows SIM886 users to create simple assembly language programs within the SIM886 monitor/debugger. Using the Standard Library pattern matching routines makes it very easy to implement such an assembler.
The grammar for this miniassembler is
Stmt
Grp1
reg "
" operand |
Grp2 reg "
" reg "
" constant |
Grp3 operand |
goto operand |
halt
Grp1
load | store
| add | sub
Grp2
ifeq | iflt
| ifgt
Grp3
get | put
reg
ax | bx | cx
| dx
operand
reg |
constant | [bx] | constant [bx]
constant
hexdigit
constant | hexdigit
hexdigit
0 | 1 | 2 |
3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f
There are some minor semantic details that the program handles (such as disallowing stores into immediate operands). The assembly code for the miniassembler follows:
; ASM.ASM
;
.xlist
include stdlib.a
matchfuncs
includelib stdlib.lib
.list
dseg segment para public 'data'
; Some sample statements to assemble:
Str1 byte "load ax
0"
0
Str2 byte "load ax
bx"
0
Str3 byte "load ax
ax"
0
Str4 byte "add ax
15"
0
Str5 byte "sub ax
[bx]"
0
Str6 byte "store bx
[1000]"
0
Str7 byte "load bx
2000[bx]"
0
Str8 byte "goto 3000"
0
Str9 byte "iflt ax
bx
100"
0
Str10 byte "halt"
0
Str11 byte "This is illegal"
0
Str12 byte "load ax
store"
0
Str13 byte "store ax
1000"
0
Str14 byte "ifeq ax
0
0"
0
; Variables used by the assembler.
AsmConst word 0
AsmOpcode byte 0
AsmOprnd1 byte 0
AsmOprnd2 byte 0
include stdsets.a ;Bring in the standard char sets.
; Patterns for the assembler:
; Pattern is (
; (load|store|add|sub) reg "
" operand |
; (ifeq|iflt|ifgt) reg1 "
" reg2 "
" const |
; (get|put) operand |
; goto operand |
; halt
; )
;
; With a few semantic additions (e.g.
cannot store to a const).
InstrPat pattern {spancset
WhiteSpace
Grp1
Grp1}
Grp1 pattern {sl_Match2
Grp1Strs
Grp2
Grp1Oprnds}
Grp1Strs pattern {TryLoad
Grp1Store}
Grp1Store pattern {TryStore
Grp1Add}
Grp1Add pattern {TryAdd
Grp1Sub}
Grp1Sub pattern {TrySub}
; Patterns for the LOAD
STORE
ADD
and SUB instructions.
LoadPat pattern {MatchStr
LoadInstr2}
LoadInstr2 byte "LOAD"
0
StorePat pattern {MatchStr
StoreInstr2}
StoreInstr2 byte "STORE"
0
AddPat pattern {MatchStr
AddInstr2}
AddInstr2 byte "ADD"
0
SubPat pattern {MatchStr
SubInstr2}
SubInstr2 byte "SUB"
0
; Patterns for the group one (LOAD/STORE/ADD/SUB) instruction operands:
Grp1Oprnds pattern {spancset
WhiteSpace
Grp1reg
Grp1reg}
Grp1Reg pattern {MatchReg
AsmOprnd1
Grp1ws2}
Grp1ws2 pattern {spancset
WhiteSpace
Grp1Comma
Grp1Comma}
Grp1Comma pattern {MatchChar
'
'
0
Grp1ws3}
Grp1ws3 pattern {spancset
WhiteSpace
Grp1Op2
Grp1Op2}
Grp1Op2 pattern {MatchGen
EndOfLine}
EndOfLine pattern {spancset
WhiteSpace
NullChar
NullChar}
NullChar pattern {EOS}
Grp1Op2Reg pattern {MatchReg
AsmOprnd2}
; Patterns for the group two instructions (IFEQ
IFLT
IFGT):
Grp2 pattern {sl_Match2
Grp2Strs
Grp3
Grp2Oprnds}
Grp2Strs pattern {TryIFEQ
Grp2IFLT}
Grp2IFLT pattern {TryIFLT
Grp2IFGT}
Grp2IFGT pattern {TryIFGT}
Grp2Oprnds pattern {spancset
WhiteSpace
Grp2reg
Grp2reg}
Grp2Reg pattern {MatchReg
AsmOprnd1
Grp2ws2}
Grp2ws2 pattern {spancset
WhiteSpace
Grp2Comma
Grp2Comma}
Grp2Comma pattern {MatchChar
'
'
0
Grp2ws3}
Grp2ws3 pattern {spancset
WhiteSpace
Grp2Reg2
Grp2Reg2}
Grp2Reg2 pattern {MatchReg
AsmOprnd2
Grp2ws4}
Grp2ws4 pattern {spancset
WhiteSpace
Grp2Comma2
Grp2Comma2}
Grp2Comma2 pattern {MatchChar
'
'
0
Grp2ws5}
Grp2ws5 pattern {spancset
WhiteSpace
Grp2Op3
Grp2Op3}
Grp2Op3 pattern {ConstPat
EndOfLine}
; Patterns for the IFEQ
IFLT
and IFGT instructions.
IFEQPat pattern {MatchStr
IFEQInstr2}
IFEQInstr2 byte "IFEQ"
0
IFLTPat pattern {MatchStr
IFLTInstr2}
IFLTInstr2 byte "IFLT"
0
IFGTPat pattern {MatchStr
IFGTInstr2}
IFGTInstr2 byte "IFGT"
0
; Grp3 Patterns:
Grp3 pattern {sl_Match2
Grp3Strs
Grp4
Grp3Oprnds}
Grp3Strs pattern {TryGet
Grp3Put}
Grp3Put pattern {TryPut
Grp3GOTO}
Grp3Goto pattern {TryGOTO}
; Patterns for the GET and PUT instructions.
GetPat pattern {MatchStr
GetInstr2}
GetInstr2 byte "GET"
0
PutPat pattern {MatchStr
PutInstr2}
PutInstr2 byte "PUT"
0
GOTOPat pattern {MatchStr
GOTOInstr2}
GOTOInstr2 byte "GOTO"
0
; Patterns for the group three (PUT/GET/GOTO) instruction operands:
Grp3Oprnds pattern {spancset
WhiteSpace
Grp3Op
Grp3Op}
Grp3Op pattern {MatchGen
EndOfLine}
; Patterns for the group four instruction (HALT).
Grp4 pattern {TryHalt
EndOfLine}
HaltPat pattern {MatchStr
HaltInstr2}
HaltInstr2 byte "HALT"
0
; Patterns to match the four non-register addressing modes:
BXIndrctPat pattern {MatchStr
BXIndrctStr}
BXIndrctStr byte "[BX]"
0
BXIndexedPat pattern {ConstPat
BXIndrctPat}
DirectPat pattern {MatchChar
'['
DP2}
DP2 pattern {ConstPat
DP3}
DP3 pattern {MatchChar
']'}
ImmediatePat pattern {ConstPat}
; Pattern to match a hex constant:
HexConstPat pattern {Spancset
xdigits}
dseg ends
cseg segment para public 'code'
assume cs:cseg
ds:dseg
; The store macro tweaks the DS register and stores into the
; specified variable in DSEG.
store macro Where
What
push ds
push ax
mov ax
seg Where
mov ds
ax
mov Where
What
pop ax
pop ds
endm
; Pattern matching routines for the assembler.
; Each mnemonic has its own corresponding matching function that
; attempts to match the mnemonic. If it does
it initializes the
; AsmOpcode variable with the base opcode of the instruction.
; Compare against the "LOAD" string.
TryLoad proc far
push dx
push si
ldxi LoadPat
match2
jnc NoTLMatch
store AsmOpcode
0 ;Initialize base opcode.
NoTLMatch: pop si
pop dx
ret
TryLoad endp
; Compare against the "STORE" string.
TryStore proc far
push dx
push si
ldxi StorePat
match2
jnc NoTSMatch
store AsmOpcode
1 ;Initialize base opcode.
NoTSMatch: pop si
pop dx
ret
TryStore endp
; Compare against the "ADD" string.
TryAdd proc far
push dx
push si
ldxi AddPat
match2
jnc NoTAMatch
store AsmOpcode
2 ;Initialize ADD opcode.
NoTAMatch: pop si
pop dx
ret
TryAdd endp
; Compare against the "SUB" string.
TrySub proc far
push dx
push si
ldxi SubPat
match2
jnc NoTMMatch
store AsmOpcode
3 ;Initialize SUB opcode.
NoTMMatch: pop si
pop dx
ret
TrySub endp
; Compare against the "IFEQ" string.
TryIFEQ proc far
push dx
push si
ldxi IFEQPat
match2
jnc NoIEMatch
store AsmOpcode
4 ;Initialize IFEQ opcode.
NoIEMatch: pop si
pop dx
ret
TryIFEQ endp
; Compare against the "IFLT" string.
TryIFLT proc far
push dx
push si
ldxi IFLTPat
match2
jnc NoILMatch
store AsmOpcode
5 ;Initialize IFLT opcode.
NoILMatch: pop si
pop dx
ret
TryIFLT endp
; Compare against the "IFGT" string.
TryIFGT proc far
push dx
push si
ldxi IFGTPat
match2
jnc NoIGMatch
store AsmOpcode
6 ;Initialize IFGT opcode.
NoIGMatch: pop si
pop dx
ret
TryIFGT endp
; Compare against the "GET" string.
TryGET proc far
push dx
push si
ldxi GetPat
match2
jnc NoGMatch
store AsmOpcode
7 ;Initialize Special opcode.
store AsmOprnd1
2 ;GET's Special opcode.
NoGMatch: pop si
pop dx
ret
TryGET endp
; Compare against the "PUT" string.
TryPut proc far
push dx
push si
ldxi PutPat
match2
jnc NoPMatch
store AsmOpcode
7 ;Initialize Special opcode.
store AsmOprnd1
3 ;PUT's Special opcode.
NoPMatch: pop si
pop dx
ret
TryPUT endp
; Compare against the "GOTO" string.
TryGOTO proc far
push dx
push si
ldxi GOTOPat
match2
jnc NoGMatch
store AsmOpcode
7 ;Initialize Special opcode.
store AsmOprnd1
1 ;PUT's Special opcode.
NoGMatch: pop si
pop dx
ret
TryGOTO endp
; Compare against the "HALT" string.
TryHalt proc far
push dx
push si
ldxi HaltPat
match2
jnc NoHMatch
store AsmOpcode
7 ;Initialize Special opcode.
store AsmOprnd1
0 ;Halt's special opcode.
store AsmOprnd2
0
NoHMatch: pop si
pop dx
ret
TryHALT endp
; MatchReg checks to see if we've got a valid register value. On entry
; DS:SI points at the location to store the byte opcode (0
1
2
or 3) for
; a reasonable register (AX
BX
CX
or DX); ES:DI points at the string
; containing (hopefully) the register operand
and CX points at the last
; location plus one we can check in the string.
;
; On return
Carry=1 for success
0 for failure. ES:AX must point beyond
; the characters which make up the register if we have a match.
MatchReg proc far
; ES:DI Points at two characters which should be AX/BX/CX/DX. Anything
; else is an error.
cmp byte ptr es:1[di]
'X' ;Everyone needs this
jne BadReg
xor ax
ax ;886 "AX" reg code.
cmp byte ptr es:[di]
'A' ;AX?
je GoodReg
inc ax
cmp byte ptr es:[di]
'B' ;BX?
je GoodReg
inc ax
cmp byte ptr es:[di]
'C' ;CX?
je GoodReg
inc ax
cmp byte ptr es:[di]
'D' ;DX?
je GoodReg
BadReg: clc
mov ax
di
ret
GoodReg:
mov ds:[si]
al ;Save register opcode.
lea ax
2[di] ;Skip past register.
cmp ax
cx ;Be sure we didn't go
ja BadReg ; too far.
stc
ret
MatchReg endp
; MatchGen- Matches a general addressing mode. Stuffs the appropriate
; addressing mode code into AsmOprnd2. If a 16-bit constant
; is required by this addressing mode
this code shoves that
; into the AsmConst variable.
MatchGen proc far
push dx
push si
; Try a register operand.
ldxi Grp1Op2Reg
match2
jc MGDone
; Try "[bx]".
ldxi BXIndrctPat
match2
jnc TryBXIndexed
store AsmOprnd2
4
jmp MGDone
; Look for an operand of the form "xxxx[bx]".
TryBXIndexed:
ldxi BXIndexedPat
match2
jnc TryDirect
store AsmOprnd2
5
jmp MGDone
; Try a direct address operand "[xxxx]".
TryDirect:
ldxi DirectPat
match2
jnc TryImmediate
store AsmOprnd2
6
jmp MGDone
; Look for an immediate operand "xxxx".
TryImmediate:
ldxi ImmediatePat
match2
jnc MGDone
store AsmOprnd2
7
MGDone:
pop si
pop dx
ret
MatchGen endp
; ConstPat- Matches a 16-bit hex constant. If it matches
it converts
; the string to an integer and stores it into AsmConst.
ConstPat proc far
push dx
push si
ldxi HexConstPat
match2
jnc CPDone
push ds
push ax
mov ax
seg AsmConst
mov ds
ax
atoh
mov AsmConst
ax
pop ax
pop ds
stc
CPDone: pop si
pop dx
ret
ConstPat endp
; Assemble- This code assembles the instruction that ES:DI points
; at and displays the hex opcode(s) for that instruction.
Assemble proc near
; Print out the instruction we're about to assemble.
print
byte "Assembling: "
0
strupr
puts
putcr
; Assemble the instruction:
ldxi InstrPat
xor cx
cx
match
jnc SyntaxError
; Quick check for illegal instructions:
cmp AsmOpcode
7 ;Special/Get instr.
jne TryStoreInstr
cmp AsmOprnd1
2 ;GET opcode
je SeeIfImm
cmp AsmOprnd1
1 ;Goto opcode
je IsGOTO
TryStoreInstr: cmp AsmOpcode
1 ;Store Instruction
jne InstrOkay
SeeIfImm: cmp AsmOprnd2
7 ;Immediate Adrs Mode
jne InstrOkay
print
db "Syntax error: store/get immediate not allowed."
db " Try Again"
cr
lf
0
jmp ASMDone
IsGOTO: cmp AsmOprnd2
7 ;Immediate mode for GOTO
je InstrOkay
print
db "Syntax error: GOTO only allows immediate "
byte "mode."
cr
lf
db 0
jmp ASMDone
; Merge the opcode and operand fields together in the instruction byte
; then output the opcode byte.
InstrOkay: mov al
AsmOpcode
shl al
1
shl al
1
or al
AsmOprnd1
shl al
1
shl al
1
shl al
1
or al
AsmOprnd2
puth
cmp AsmOpcode
4 ;IFEQ instruction
jb SimpleInstr
cmp AsmOpcode
6 ;IFGT instruction
jbe PutConstant
SimpleInstr: cmp AsmOprnd2
5
jb ASMDone
; If this instruction has a 16 bit operand
output it here.
PutConstant: mov al
' '
putc
mov ax
ASMConst
puth
mov al
' '
putc
xchg al
ah
puth
jmp ASMDone
SyntaxError: print
db "Syntax error in instruction."
db cr
lf
0
ASMDone: putcr
ret
Assemble endp
; Main program that tests the assembler.
Main proc
mov ax
seg dseg ;Set up the segment registers
mov ds
ax
mov es
ax
meminit
lesi Str1
call Assemble
lesi Str2
call Assemble
lesi Str3
call Assemble
lesi Str4
call Assemble
lesi Str5
call Assemble
lesi Str6
call Assemble
lesi Str7
call Assemble
lesi Str8
call Assemble
lesi Str9
call Assemble
lesi Str10
call Assemble
lesi Str11
call Assemble
lesi Str12
call Assemble
lesi Str13
call Assemble
lesi Str14
call Assemble
Quit: ExitPgm
Main endp
cseg ends
sseg segment para stack 'stack'
stk db 256 dup ("stack ")
sseg ends
zzzzzzseg segment para public 'zzzzzz'
LastBytes db 16 dup (?)
zzzzzzseg ends
end Main
Sample Output:
Assembling: LOAD AX 0 07 00 00 Assembling: LOAD AX BX 01 Assembling: LOAD AX AX 00 Assembling: ADD AX 15 47 15 00 Assembling: SUB AX [BX] 64 Assembling: STORE BX [1000] 2E 00 10 Assembling: LOAD BX 2000[BX] 0D 00 20 Assembling: GOTO 3000 EF 00 30 Assembling: IFLT AX BX 100 A1 00 01 Assembling: HALT E0 Assembling: THIS IS ILLEGAL Syntax error in instruction. Assembling: LOAD AX STORE Syntax error in instruction. Assembling: STORE AX 1000 Syntax error: store/get immediate not allowed. Try Again Assembling: IFEQ AX 0 0 Syntax error in instruction.
|
Table of Content | Chapter Sixteen (Part 12) |
Chapter Sixteen: Pattern Matching
(Part 11)
29 SEP 1996