123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385 |
- package glisp
- import (
- "bytes"
- "errors"
- "io"
- "regexp"
- "strconv"
- "unicode/utf8"
- )
- type TokenType int
- const (
- TokenLParen TokenType = iota
- TokenRParen
- TokenLSquare
- TokenRSquare
- TokenLCurly
- TokenRCurly
- TokenDot
- TokenQuote
- TokenBacktick
- TokenTilde
- TokenTildeAt
- TokenSymbol
- TokenBool
- TokenDecimal
- TokenHex
- TokenOct
- TokenBinary
- TokenFloat
- TokenChar
- TokenString
- TokenEnd
- )
- type Token struct {
- typ TokenType
- str string
- }
- func (t Token) String() string {
- switch t.typ {
- case TokenLParen:
- return "("
- case TokenRParen:
- return ")"
- case TokenLSquare:
- return "["
- case TokenRSquare:
- return "]"
- case TokenLCurly:
- return "{"
- case TokenRCurly:
- return "}"
- case TokenDot:
- return "."
- case TokenQuote:
- return "'"
- case TokenBacktick:
- return "`"
- case TokenTilde:
- return "~"
- case TokenTildeAt:
- return "~@"
- case TokenHex:
- return "0x" + t.str
- case TokenOct:
- return "0o" + t.str
- case TokenBinary:
- return "0b" + t.str
- case TokenChar:
- quoted := strconv.Quote(t.str)
- return "#" + quoted[1:len(quoted)-1]
- }
- return t.str
- }
- type LexerState int
- const (
- LexerNormal LexerState = iota
- LexerComment
- LexerStrLit
- LexerStrEscaped
- LexerUnquote
- )
- type Lexer struct {
- state LexerState
- tokens []Token
- buffer *bytes.Buffer
- stream io.RuneReader
- linenum int
- finished bool
- }
- var (
- BoolRegex = regexp.MustCompile("^(true|false)$")
- DecimalRegex = regexp.MustCompile("^-?[0-9]+$")
- HexRegex = regexp.MustCompile("^0x[0-9a-fA-F]+$")
- OctRegex = regexp.MustCompile("^0o[0-7]+$")
- BinaryRegex = regexp.MustCompile("^0b[01]+$")
- SymbolRegex = regexp.MustCompile("^[^'#]+$")
- CharRegex = regexp.MustCompile("^#\\\\?.$")
- FloatRegex = regexp.MustCompile("^-?([0-9]+\\.[0-9]*)|(\\.[0-9]+)|([0-9]+(\\.[0-9]*)?[eE](-?[0-9]+))$")
- )
- func StringToRunes(str string) []rune {
- b := []byte(str)
- runes := make([]rune, 0)
- for len(b) > 0 {
- r, size := utf8.DecodeRune(b)
- runes = append(runes, r)
- b = b[size:]
- }
- return runes
- }
- func EscapeChar(char rune) (rune, error) {
- switch char {
- case 'n':
- return '\n', nil
- case 'r':
- return '\r', nil
- case 'a':
- return '\a', nil
- case 't':
- return '\t', nil
- case '\\':
- return '\\', nil
- case '"':
- return '"', nil
- case '\'':
- return '\'', nil
- case '#':
- return '#', nil
- }
- return ' ', errors.New("invalid escape sequence")
- }
- func DecodeChar(atom string) (string, error) {
- runes := StringToRunes(atom)
- if len(runes) == 3 {
- char, err := EscapeChar(runes[2])
- return string(char), err
- }
- if len(runes) == 2 {
- return string(runes[1:2]), nil
- }
- return "", errors.New("not a char literal")
- }
- func DecodeAtom(atom string) (Token, error) {
- if atom == "." {
- return Token{TokenDot, ""}, nil
- }
- if BoolRegex.MatchString(atom) {
- return Token{TokenBool, atom}, nil
- }
- if DecimalRegex.MatchString(atom) {
- return Token{TokenDecimal, atom}, nil
- }
- if HexRegex.MatchString(atom) {
- return Token{TokenHex, atom[2:]}, nil
- }
- if OctRegex.MatchString(atom) {
- return Token{TokenOct, atom[2:]}, nil
- }
- if BinaryRegex.MatchString(atom) {
- return Token{TokenBinary, atom[2:]}, nil
- }
- if FloatRegex.MatchString(atom) {
- return Token{TokenFloat, atom}, nil
- }
- if SymbolRegex.MatchString(atom) {
- return Token{TokenSymbol, atom}, nil
- }
- if CharRegex.MatchString(atom) {
- char, err := DecodeChar(atom)
- if err != nil {
- return Token{}, err
- }
- return Token{TokenChar, char}, nil
- }
- return Token{}, errors.New("Unrecognized atom")
- }
- func (lexer *Lexer) dumpBuffer() error {
- if lexer.buffer.Len() <= 0 {
- return nil
- }
- tok, err := DecodeAtom(lexer.buffer.String())
- if err != nil {
- return err
- }
- lexer.buffer.Reset()
- lexer.tokens = append(lexer.tokens, tok)
- return nil
- }
- func (lexer *Lexer) dumpString() {
- str := lexer.buffer.String()
- lexer.buffer.Reset()
- lexer.tokens = append(lexer.tokens, Token{TokenString, str})
- }
- func DecodeBrace(brace rune) Token {
- switch brace {
- case '(':
- return Token{TokenLParen, ""}
- case ')':
- return Token{TokenRParen, ""}
- case '[':
- return Token{TokenLSquare, ""}
- case ']':
- return Token{TokenRSquare, ""}
- case '{':
- return Token{TokenLCurly, ""}
- case '}':
- return Token{TokenRCurly, ""}
- }
- return Token{TokenEnd, ""}
- }
- func (lexer *Lexer) LexNextRune(r rune) error {
- if lexer.state == LexerComment {
- if r == '\n' {
- lexer.state = LexerNormal
- }
- return nil
- }
- if lexer.state == LexerStrLit {
- if r == '\\' {
- lexer.state = LexerStrEscaped
- return nil
- }
- if r == '"' {
- lexer.dumpString()
- lexer.state = LexerNormal
- return nil
- }
- lexer.buffer.WriteRune(r)
- return nil
- }
- if lexer.state == LexerStrEscaped {
- char, err := EscapeChar(r)
- if err != nil {
- return err
- }
- lexer.buffer.WriteRune(char)
- lexer.state = LexerStrLit
- return nil
- }
- if lexer.state == LexerUnquote {
- if r == '@' {
- lexer.tokens = append(
- lexer.tokens, Token{TokenTildeAt, ""})
- } else {
- lexer.tokens = append(
- lexer.tokens, Token{TokenTilde, ""})
- lexer.buffer.WriteRune(r)
- }
- lexer.state = LexerNormal
- return nil
- }
- if r == '"' {
- if lexer.buffer.Len() > 0 {
- return errors.New("Unexpected quote")
- }
- lexer.state = LexerStrLit
- return nil
- }
- if r == ';' {
- lexer.state = LexerComment
- return nil
- }
- if r == '\'' {
- if lexer.buffer.Len() > 0 {
- return errors.New("Unexpected quote")
- }
- lexer.tokens = append(lexer.tokens, Token{TokenQuote, ""})
- return nil
- }
- if r == '`' {
- if lexer.buffer.Len() > 0 {
- return errors.New("Unexpected backtick")
- }
- lexer.tokens = append(lexer.tokens, Token{TokenBacktick, ""})
- return nil
- }
- if r == '~' {
- if lexer.buffer.Len() > 0 {
- return errors.New("Unexpected tilde")
- }
- lexer.state = LexerUnquote
- return nil
- }
- if r == '(' || r == ')' || r == '[' || r == ']' || r == '{' || r == '}' {
- err := lexer.dumpBuffer()
- if err != nil {
- return err
- }
- lexer.tokens = append(lexer.tokens, DecodeBrace(r))
- return nil
- }
- if r == ' ' || r == '\n' || r == '\t' || r == '\r' {
- if r == '\n' {
- lexer.linenum++
- }
- err := lexer.dumpBuffer()
- if err != nil {
- return err
- }
- return nil
- }
- _, err := lexer.buffer.WriteRune(r)
- if err != nil {
- return err
- }
- return nil
- }
- func (lexer *Lexer) PeekNextToken() (Token, error) {
- if lexer.finished {
- return Token{TokenEnd, ""}, nil
- }
- for len(lexer.tokens) == 0 {
- r, _, err := lexer.stream.ReadRune()
- if err != nil {
- lexer.finished = true
- if lexer.buffer.Len() > 0 {
- lexer.dumpBuffer()
- return lexer.tokens[0], nil
- }
- return Token{TokenEnd, ""}, nil
- }
- err = lexer.LexNextRune(r)
- if err != nil {
- return Token{TokenEnd, ""}, err
- }
- }
- tok := lexer.tokens[0]
- return tok, nil
- }
- func (lexer *Lexer) GetNextToken() (Token, error) {
- tok, err := lexer.PeekNextToken()
- if err != nil || tok.typ == TokenEnd {
- return Token{TokenEnd, ""}, err
- }
- lexer.tokens = lexer.tokens[1:]
- return tok, nil
- }
- func NewLexerFromStream(stream io.RuneReader) *Lexer {
- return &Lexer{
- tokens: make([]Token, 0, 10),
- buffer: new(bytes.Buffer),
- state: LexerNormal,
- stream: stream,
- linenum: 1,
- finished: false,
- }
- }
- func (lexer *Lexer) Linenum() int {
- return lexer.linenum
- }
|