gisp.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. package main
  2. import (
  3. "bufio"
  4. "fmt"
  5. "io/ioutil"
  6. "os"
  7. "strconv"
  8. "strings"
  9. "unicode"
  10. "unicode/utf8"
  11. )
  12. type tokenType int
  13. const (
  14. _INVALID tokenType = iota
  15. _EOF
  16. _INT
  17. _SYMBOL
  18. _LPAREN
  19. _RPAREN
  20. _STRING
  21. _FLOAT
  22. _BOOL
  23. _QUOTE
  24. _QUASIQUOTE
  25. _UNQUOTE
  26. _UNQUOTESPLICE
  27. )
  28. func (t tokenType) String() string {
  29. switch t {
  30. case _INVALID:
  31. return "INVALID TOKEN"
  32. case _EOF:
  33. return "EOF"
  34. case _INT:
  35. return "INT"
  36. case _SYMBOL:
  37. return "SYMBOL"
  38. case _LPAREN:
  39. return "LEFT_PAREN"
  40. case _RPAREN:
  41. return "RIGHT_PAREN"
  42. case _STRING:
  43. return "STRING"
  44. case _FLOAT:
  45. return "FLOAT"
  46. case _BOOL:
  47. return "BOOL"
  48. case _QUOTE:
  49. return "'"
  50. case _QUASIQUOTE:
  51. return "`"
  52. case _UNQUOTE:
  53. return ","
  54. case _UNQUOTESPLICE:
  55. return ",@"
  56. default:
  57. return "WTF!?"
  58. }
  59. }
  60. type token struct {
  61. typ tokenType // The type of this item.
  62. pos Pos // The starting position, in bytes, of this item in the input string.
  63. val string // The value of this item.
  64. }
  65. func (t token) String() string {
  66. return fmt.Sprintf("%s", t.val)
  67. }
  68. const eof = -1
  69. type stateFn func(*lexer) stateFn
  70. type Pos int
  71. type lexer struct {
  72. name string
  73. input string
  74. state stateFn
  75. pos Pos
  76. start Pos
  77. width Pos
  78. lastPos Pos
  79. tokens chan token
  80. parenDepth int
  81. }
  82. func (l *lexer) run() {
  83. for l.state = lexWhitespace; l.state != nil; {
  84. l.state = l.state(l)
  85. }
  86. }
  87. func (l *lexer) next() rune {
  88. if int(l.pos) >= len(l.input) {
  89. l.width = 0
  90. return eof
  91. }
  92. r, w := utf8.DecodeRuneInString(l.input[l.pos:])
  93. l.width = Pos(w)
  94. l.pos += l.width
  95. return r
  96. }
  97. // peek returns but does not consume the next rune in the input.
  98. func (l *lexer) peek() rune {
  99. r := l.next()
  100. l.backup()
  101. return r
  102. }
  103. // backup steps back one rune. Can only be called once per call of next.
  104. func (l *lexer) backup() {
  105. l.pos -= l.width
  106. }
  107. func (l *lexer) emit(t tokenType) {
  108. l.tokens <- token{t, l.start, l.input[l.start:l.pos]}
  109. l.start = l.pos
  110. }
  111. func (l *lexer) ignore() {
  112. l.start = l.pos
  113. }
  114. // accept consumes the next rune if it's from the valid set.
  115. func (l *lexer) accept(valid string) bool {
  116. if strings.IndexRune(valid, l.next()) >= 0 {
  117. return true
  118. }
  119. l.backup()
  120. return false
  121. }
  122. // acceptRun consumes a run of runes from the valid set.
  123. func (l *lexer) acceptRun(valid string) {
  124. for strings.IndexRune(valid, l.next()) >= 0 {
  125. }
  126. l.backup()
  127. }
  128. func (l *lexer) lineNumber() int {
  129. return 1 + strings.Count(l.input[:l.lastPos], "\n")
  130. }
  131. func (l *lexer) errorf(format string, args ...interface{}) stateFn {
  132. l.tokens <- token{_INVALID, l.start, fmt.Sprintf(format, args...)}
  133. return nil
  134. }
  135. func (l *lexer) nextToken() token {
  136. token := <-l.tokens
  137. l.lastPos = token.pos
  138. return token
  139. }
  140. // lexes an open parenthesis
  141. func lexOpenParen(l *lexer) stateFn {
  142. l.emit(_LPAREN)
  143. l.parenDepth++
  144. r := l.next()
  145. switch r {
  146. case ' ', '\t', '\n', '\r':
  147. return lexWhitespace
  148. case '\'':
  149. return lexQuote
  150. case '`':
  151. return lexQuasiquote
  152. case ',':
  153. return lexUnquote
  154. case '(':
  155. return lexOpenParen
  156. case ')':
  157. return lexCloseParen
  158. case ';':
  159. return lexComment
  160. case '#':
  161. return lexBool
  162. }
  163. if unicode.IsDigit(r) {
  164. return lexInt
  165. }
  166. return lexSymbol
  167. }
  168. func lexBool(l *lexer) stateFn {
  169. l.accept("tf")
  170. l.emit(_BOOL)
  171. r := l.next()
  172. switch r {
  173. case ' ', '\t', '\n':
  174. return lexWhitespace
  175. case ')':
  176. return lexCloseParen
  177. case ';':
  178. return lexComment
  179. }
  180. return l.errorf("unexpected tokens")
  181. }
  182. func lexQuote(l *lexer) stateFn {
  183. l.acceptRun(" ")
  184. l.ignore()
  185. l.emit(_QUOTE)
  186. r := l.next()
  187. switch r {
  188. case '"':
  189. return lexString
  190. case '(':
  191. return lexOpenParen
  192. case ')':
  193. return lexCloseParen
  194. case '#':
  195. return lexBool
  196. case '\'':
  197. return lexQuote
  198. case '`':
  199. return lexQuasiquote
  200. case ',':
  201. return lexUnquote
  202. }
  203. if unicode.IsDigit(r) {
  204. return lexInt
  205. }
  206. return lexSymbol
  207. }
  208. func lexQuasiquote(l *lexer) stateFn {
  209. l.acceptRun(" ")
  210. l.ignore()
  211. l.emit(_QUASIQUOTE)
  212. r := l.next()
  213. switch r {
  214. case '"':
  215. return lexString
  216. case '(':
  217. return lexOpenParen
  218. case ')':
  219. return lexCloseParen
  220. case '#':
  221. return lexBool
  222. case '\'':
  223. return lexQuote
  224. case '`':
  225. return lexQuasiquote
  226. case ',':
  227. return lexUnquote
  228. }
  229. if unicode.IsDigit(r) {
  230. return lexInt
  231. }
  232. return lexSymbol
  233. }
  234. func lexUnquote(l *lexer) stateFn {
  235. if l.peek() == '@' {
  236. return lexUnquoteSplice
  237. }
  238. l.acceptRun(" ")
  239. l.ignore()
  240. l.emit(_UNQUOTE)
  241. r := l.next()
  242. switch r {
  243. case '"':
  244. return lexString
  245. case '(':
  246. return lexOpenParen
  247. case ')':
  248. return lexCloseParen
  249. case '#':
  250. return lexBool
  251. case '\'':
  252. return lexQuote
  253. case '`':
  254. return lexQuasiquote
  255. case ',':
  256. return lexUnquote
  257. }
  258. if unicode.IsDigit(r) {
  259. return lexInt
  260. }
  261. return lexSymbol
  262. }
  263. func lexUnquoteSplice(l *lexer) stateFn {
  264. r := l.next()
  265. l.acceptRun(" ")
  266. l.ignore()
  267. l.emit(_UNQUOTESPLICE)
  268. r = l.next()
  269. switch r {
  270. case '"':
  271. return lexString
  272. case '(':
  273. return lexOpenParen
  274. case ')':
  275. return lexCloseParen
  276. case '#':
  277. return lexBool
  278. case '\'':
  279. return lexQuote
  280. case '`':
  281. return lexQuasiquote
  282. case ',':
  283. return lexUnquote
  284. }
  285. if unicode.IsDigit(r) {
  286. return lexInt
  287. }
  288. return lexSymbol
  289. }
  290. func lexWhitespace(l *lexer) stateFn {
  291. l.ignore()
  292. r := l.next()
  293. switch r {
  294. case ' ', '\t', '\n':
  295. return lexWhitespace
  296. case '\'':
  297. return lexQuote
  298. case '`':
  299. return lexQuasiquote
  300. case ',':
  301. return lexUnquote
  302. case '"':
  303. return lexString
  304. case '(':
  305. return lexOpenParen
  306. case ')':
  307. return lexCloseParen
  308. case ';':
  309. return lexComment
  310. case '#':
  311. return lexBool
  312. case eof:
  313. if l.parenDepth > 0 {
  314. return l.errorf("unclosed paren")
  315. }
  316. l.emit(_EOF)
  317. return nil
  318. }
  319. if unicode.IsDigit(r) {
  320. return lexInt
  321. }
  322. return lexSymbol
  323. }
  324. func lexString(l *lexer) stateFn {
  325. r := l.next()
  326. switch r {
  327. case '"':
  328. l.emit(_STRING)
  329. return lexWhitespace
  330. case '\\':
  331. // l.backup()
  332. // l.input = append(l.input[:l.pos], l.input[l.pos+1:])
  333. l.next()
  334. return lexString
  335. }
  336. return lexString
  337. }
  338. // lex an integer. Once we're on an integer, the only valid characters are
  339. // whitespace, close paren, a period to indicate we want a float, or more
  340. // digits. Everything else is crap.
  341. func lexInt(l *lexer) stateFn {
  342. digits := "0123456789"
  343. l.acceptRun(digits)
  344. r := l.peek()
  345. switch r {
  346. case ' ', '\t', '\n':
  347. l.emit(_INT)
  348. l.next()
  349. return lexWhitespace
  350. case '.':
  351. l.next()
  352. return lexFloat
  353. case ')':
  354. l.emit(_INT)
  355. l.next()
  356. return lexCloseParen
  357. case ';':
  358. l.emit(_INT)
  359. l.next()
  360. return lexComment
  361. }
  362. return l.errorf("unexpected rune in lexInt: %c", r)
  363. }
  364. // once we're in a float, the only valid values are digits, whitespace or close
  365. // paren.
  366. func lexFloat(l *lexer) stateFn {
  367. digits := "0123456789"
  368. l.acceptRun(digits)
  369. l.emit(_FLOAT)
  370. r := l.next()
  371. switch r {
  372. case ' ', '\t', '\n':
  373. return lexWhitespace
  374. case ')':
  375. return lexCloseParen
  376. case ';':
  377. return lexComment
  378. }
  379. return l.errorf("unexpected run in lexFloat: %c", r)
  380. }
  381. func lexSymbol(l *lexer) stateFn {
  382. r := l.peek()
  383. switch r {
  384. case ' ', '\t', '\n':
  385. l.emit(_SYMBOL)
  386. l.next()
  387. return lexWhitespace
  388. case ')':
  389. l.emit(_SYMBOL)
  390. l.next()
  391. return lexCloseParen
  392. case ';':
  393. l.emit(_SYMBOL)
  394. l.next()
  395. return lexComment
  396. default:
  397. l.next()
  398. return lexSymbol
  399. }
  400. }
  401. // lex a close parenthesis
  402. func lexCloseParen(l *lexer) stateFn {
  403. l.emit(_RPAREN)
  404. l.parenDepth--
  405. if l.parenDepth < 0 {
  406. return l.errorf("unexpected close paren")
  407. }
  408. r := l.next()
  409. switch r {
  410. case ' ', '\t', '\n':
  411. return lexWhitespace
  412. case '(':
  413. return lexOpenParen
  414. case ')':
  415. return lexCloseParen
  416. case ';':
  417. return lexComment
  418. }
  419. return l.errorf("unimplemented")
  420. }
  421. // lexes a comment
  422. func lexComment(l *lexer) stateFn {
  423. r := l.next()
  424. switch r {
  425. case '\n', '\r':
  426. return lexWhitespace
  427. }
  428. return lexComment
  429. }
  430. func lex(input string) *lexer {
  431. l := &lexer{
  432. // name: name,
  433. input: input,
  434. tokens: make(chan token),
  435. }
  436. go l.run()
  437. return l
  438. }
  439. type Pair struct {
  440. car, cdr interface{}
  441. }
  442. func (p *Pair) String() string {
  443. return fmt.Sprintf("(%v %v)", p.car, p.cdr)
  444. }
  445. func Cons(car, cdr interface{}) *Pair {
  446. return &Pair{car, cdr}
  447. }
  448. func parse(l *lexer, p []Any) []Any {
  449. for {
  450. t := l.nextToken()
  451. if t.typ == _EOF {
  452. break
  453. } else if t.typ == _INVALID {
  454. panic("syntax error")
  455. }
  456. if t.typ == _LPAREN {
  457. p = append(p, parse(l, []Any{}))
  458. return parse(l, p)
  459. } else if t.typ == _RPAREN {
  460. return p
  461. } else {
  462. var v Any
  463. switch t.typ {
  464. case _UNQUOTESPLICE:
  465. nextExp := parse(l, []Any{})
  466. return append(append(p, []Any{Symbol("unquote-splice"), nextExp[0]}), nextExp[1:]...)
  467. case _UNQUOTE:
  468. nextExp := parse(l, []Any{})
  469. return append(append(p, []Any{Symbol("unquote"), nextExp[0]}), nextExp[1:]...)
  470. case _QUASIQUOTE:
  471. nextExp := parse(l, []Any{})
  472. return append(append(p, []Any{Symbol("quasiquote"), nextExp[0]}), nextExp[1:]...)
  473. case _QUOTE:
  474. nextExp := parse(l, []Any{})
  475. return append(append(p, []Any{Symbol("quote"), nextExp[0]}), nextExp[1:]...)
  476. case _INT:
  477. v, _ = strconv.ParseInt(t.val, 10, 0)
  478. case _FLOAT:
  479. v, _ = strconv.ParseFloat(t.val, 64)
  480. case _STRING:
  481. v = t.val[1 : len(t.val)-1]
  482. case _BOOL:
  483. if t.val == "#t" {
  484. v = true
  485. } else {
  486. v = false
  487. }
  488. case _SYMBOL:
  489. v = Symbol(t.val)
  490. }
  491. return parse(l, append(p, v))
  492. }
  493. }
  494. return p
  495. }
  496. func args(filename string) {
  497. b, err := ioutil.ReadFile(filename)
  498. if err != nil {
  499. panic(err)
  500. }
  501. l := lex(string(b) + "\n")
  502. p := parse(l, []Any{})
  503. s := NewScope(NewRootScope())
  504. res := s.EvalAll(p)
  505. if len(res) > 0 {
  506. fmt.Println(res[len(res)-1])
  507. }
  508. }
  509. func main() {
  510. if len(os.Args) > 1 {
  511. args(os.Args[1])
  512. return
  513. }
  514. r := bufio.NewReader(os.Stdin)
  515. s := NewScope(NewRootScope())
  516. b, err := ioutil.ReadFile("prelude.gisp")
  517. if err != nil {
  518. panic(err)
  519. }
  520. l := lex(string(b) + "\n")
  521. p := parse(l, []Any{})
  522. s.EvalAll(p)
  523. for {
  524. fmt.Print(">> ")
  525. line, _, err := r.ReadLine()
  526. if err != nil {
  527. fmt.Println("error: ", err)
  528. continue
  529. }
  530. l := lex(string(line) + "\n")
  531. p := parse(l, []Any{})
  532. fmt.Println(p)
  533. res := s.EvalAll(p)
  534. if len(res) > 0 {
  535. fmt.Println(res[len(res)-1])
  536. }
  537. }
  538. }