123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 |
- package lexer
- import (
- "fmt"
- "strings"
- "unicode"
- "unicode/utf8"
- )
- type Pos int
- type Item struct {
- Type ItemType
- Pos Pos
- Value string
- }
- type ItemType int
- const (
- ItemError ItemType = iota
- ItemEOF
- ItemLeftParen
- ItemRightParen
- ItemLeftVect
- ItemRightVect
- ItemIdent
- ItemString
- ItemChar
- ItemFloat
- ItemInt
- ItemComplex
- ItemQuote
- ItemQuasiQuote
- ItemUnquote
- ItemUnquoteSplice
- )
- const EOF = -1
- type stateFn func(*Lexer) stateFn
- type Lexer struct {
- name string
- input string
- state stateFn
- pos Pos
- start Pos
- width Pos
- lastPos Pos
- items chan Item
- parenDepth int
- vectDepth int
- }
- // next returns the next rune in the input.
- func (l *Lexer) next() rune {
- if int(l.pos) >= len(l.input) {
- l.width = 0
- return EOF
- }
- r, w := utf8.DecodeRuneInString(l.input[l.pos:])
- l.width = Pos(w)
- l.pos += l.width
- return r
- }
- // peek returns but does not consume the next rune in the input.
- func (l *Lexer) peek() rune {
- r := l.next()
- l.backup()
- return r
- }
- // backup steps back one rune. Can only be called once per call of next.
- func (l *Lexer) backup() {
- l.pos -= l.width
- }
- // emit passes an Item back to the client.
- func (l *Lexer) emit(t ItemType) {
- l.items <- Item{t, l.start, l.input[l.start:l.pos]}
- l.start = l.pos
- }
- func (l *Lexer) ignore() {
- l.start = l.pos
- }
- // accept consumes the next rune if it's from the valid set.
- func (l *Lexer) accept(valid string) bool {
- if strings.IndexRune(valid, l.next()) >= 0 {
- return true
- }
- l.backup()
- return false
- }
- // acceptRun consumes a run of runes from the valid set.
- func (l *Lexer) acceptRun(valid string) {
- for strings.IndexRune(valid, l.next()) >= 0 {
- }
- l.backup()
- }
- func (l *Lexer) errorf(format string, args ...interface{}) stateFn {
- l.items <- Item{ItemError, l.start, fmt.Sprintf(format, args...)}
- return nil
- }
- func (l *Lexer) NextItem() Item {
- item := <-l.items
- l.lastPos = item.Pos
- return item
- }
- func Lex(name, input string) *Lexer {
- l := &Lexer{
- name: name,
- input: input,
- items: make(chan Item),
- }
- go l.run()
- return l
- }
- func (l *Lexer) run() {
- for l.state = lexWhitespace; l.state != nil; {
- l.state = l.state(l)
- }
- close(l.items)
- }
- func lexLeftVect(l *Lexer) stateFn {
- l.emit(ItemLeftVect)
- return lexWhitespace
- }
- func lexRightVect(l *Lexer) stateFn {
- l.emit(ItemRightVect)
- return lexWhitespace
- }
- // lexes an open parenthesis
- func lexLeftParen(l *Lexer) stateFn {
- l.emit(ItemLeftParen)
- return lexWhitespace
- }
- func lexWhitespace(l *Lexer) stateFn {
- for r := l.next(); isSpace(r) || r == '\n'; l.next() {
- r = l.peek()
- }
- l.backup()
- l.ignore()
- switch r := l.next(); {
- case r == EOF:
- l.emit(ItemEOF)
- return nil
- case r == '(':
- return lexLeftParen
- case r == ')':
- return lexRightParen
- case r == '[':
- return lexLeftVect
- case r == ']':
- return lexRightVect
- case r == '"':
- return lexString
- case r == '+' || r == '-' || ('0' <= r && r <= '9'):
- return lexNumber
- case r == ';':
- return lexComment
- case isAlphaNumeric(r):
- return lexIdentifier
- default:
- panic(fmt.Sprintf("don't know what to do with: %q", r))
- }
- }
- func lexString(l *Lexer) stateFn {
- Loop:
- for {
- switch l.next() {
- case '\\':
- if r := l.next(); r != EOF {
- break
- }
- fallthrough
- case EOF:
- return l.errorf("unterminated quoted string")
- case '"':
- break Loop
- }
- }
- l.emit(ItemString)
- return lexWhitespace
- }
- func lexIdentifier(l *Lexer) stateFn {
- Loop:
- for {
- switch r := l.next(); {
- case isAlphaNumeric(r):
- // absorb it!
- default:
- l.backup()
- break Loop
- }
- }
- l.emit(ItemIdent)
- return lexWhitespace
- }
- // lex a close parenthesis
- func lexRightParen(l *Lexer) stateFn {
- l.emit(ItemRightParen)
- return lexWhitespace
- }
- // lex a comment, comment delimiter is known to be already read
- func lexComment(l *Lexer) stateFn {
- i := strings.Index(l.input[l.pos:], "\n")
- l.pos += Pos(i)
- l.ignore()
- return lexWhitespace
- }
- func lexNumber(l *Lexer) stateFn {
- if !l.scanNumber() {
- return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
- }
- if sign := l.peek(); sign == '+' || sign == '-' {
- // Complex: 1+2i. No spaces, must end in 'i'.
- if !l.scanNumber() || l.input[l.pos-1] != 'i' {
- return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
- }
- l.emit(ItemComplex)
- } else if strings.ContainsRune(l.input[l.start:l.pos], '.') {
- l.emit(ItemFloat)
- } else {
- l.emit(ItemInt)
- }
- return lexWhitespace
- }
- func (l *Lexer) scanNumber() bool {
- // Optional leading sign.
- l.accept("+-")
- // Is it hex?
- digits := "0123456789"
- if l.accept("0") && l.accept("xX") {
- digits = "0123456789abcdefABCDEF"
- }
- l.acceptRun(digits)
- if l.accept(".") {
- l.acceptRun(digits)
- }
- if l.accept("eE") {
- l.accept("+-")
- l.acceptRun("0123456789")
- }
- // Is it imaginary?
- l.accept("i")
- // Next thing mustn't be alphanumeric.
- if r := l.peek(); isAlphaNumeric(r) {
- l.next()
- return false
- }
- return true
- }
- // isSpace reports whether r is a space character.
- func isSpace(r rune) bool {
- return r == ' ' || r == '\t'
- }
- // isEndOfLine reports whether r is an end-of-line character.
- func isEndOfLine(r rune) bool {
- return r == '\r' || r == '\n'
- }
- // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
- func isAlphaNumeric(r rune) bool {
- return r == '-' || r == ':' || r == '/' || unicode.IsLetter(r) || unicode.IsDigit(r)
- }
- func debug(msg string) {
- fmt.Println(msg)
- }
|