gisp.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. package main
  2. import (
  3. "bufio"
  4. "bytes"
  5. "fmt"
  6. "os"
  7. "regexp"
  8. "strconv"
  9. "strings"
  10. "unicode"
  11. "unicode/utf8"
  12. )
  13. type Any interface{}
  14. type Symbol string
  15. type tokenType int
  16. const (
  17. _INVALID tokenType = iota
  18. _EOF
  19. _INT
  20. _SYMBOL
  21. _LPAREN
  22. _RPAREN
  23. _STRING
  24. _FLOAT
  25. _BOOL
  26. _QUOTE
  27. _QUASIQUOTE
  28. _UNQUOTE
  29. _UNQUOTESPLICE
  30. )
  31. func (t tokenType) String() string {
  32. switch t {
  33. case _INVALID:
  34. return "INVALID TOKEN"
  35. case _EOF:
  36. return "EOF"
  37. case _INT:
  38. return "INT"
  39. case _SYMBOL:
  40. return "SYMBOL"
  41. case _LPAREN:
  42. return "LEFT_PAREN"
  43. case _RPAREN:
  44. return "RIGHT_PAREN"
  45. case _STRING:
  46. return "STRING"
  47. case _FLOAT:
  48. return "FLOAT"
  49. case _BOOL:
  50. return "BOOL"
  51. case _QUOTE:
  52. return "'"
  53. case _QUASIQUOTE:
  54. return "`"
  55. case _UNQUOTE:
  56. return ","
  57. case _UNQUOTESPLICE:
  58. return ",@"
  59. default:
  60. return "WTF!?"
  61. }
  62. }
  63. type token struct {
  64. typ tokenType // The type of this item.
  65. pos Pos // The starting position, in bytes, of this item in the input string.
  66. val string // The value of this item.
  67. }
  68. func (t token) String() string {
  69. return fmt.Sprintf("%s", t.val)
  70. }
  71. const eof = -1
  72. type stateFn func(*lexer) stateFn
  73. type Pos int
  74. type lexer struct {
  75. name string
  76. input string
  77. state stateFn
  78. pos Pos
  79. start Pos
  80. width Pos
  81. lastPos Pos
  82. tokens chan token
  83. parenDepth int
  84. }
  85. func (l *lexer) run() {
  86. for l.state = lexWhitespace; l.state != nil; {
  87. l.state = l.state(l)
  88. }
  89. }
  90. func (l *lexer) next() rune {
  91. if int(l.pos) >= len(l.input) {
  92. l.width = 0
  93. return eof
  94. }
  95. r, w := utf8.DecodeRuneInString(l.input[l.pos:])
  96. l.width = Pos(w)
  97. l.pos += l.width
  98. return r
  99. }
  100. // peek returns but does not consume the next rune in the input.
  101. func (l *lexer) peek() rune {
  102. r := l.next()
  103. l.backup()
  104. return r
  105. }
  106. // backup steps back one rune. Can only be called once per call of next.
  107. func (l *lexer) backup() {
  108. l.pos -= l.width
  109. }
  110. func (l *lexer) emit(t tokenType) {
  111. l.tokens <- token{t, l.start, l.input[l.start:l.pos]}
  112. l.start = l.pos
  113. }
  114. func (l *lexer) ignore() {
  115. l.start = l.pos
  116. }
  117. // accept consumes the next rune if it's from the valid set.
  118. func (l *lexer) accept(valid string) bool {
  119. if strings.IndexRune(valid, l.next()) >= 0 {
  120. return true
  121. }
  122. l.backup()
  123. return false
  124. }
  125. // acceptRun consumes a run of runes from the valid set.
  126. func (l *lexer) acceptRun(valid string) {
  127. for strings.IndexRune(valid, l.next()) >= 0 {
  128. }
  129. l.backup()
  130. }
  131. func (l *lexer) lineNumber() int {
  132. return 1 + strings.Count(l.input[:l.lastPos], "\n")
  133. }
  134. func (l *lexer) errorf(format string, args ...interface{}) stateFn {
  135. l.tokens <- token{_INVALID, l.start, fmt.Sprintf(format, args...)}
  136. return nil
  137. }
  138. func (l *lexer) nextToken() token {
  139. token := <-l.tokens
  140. l.lastPos = token.pos
  141. return token
  142. }
  143. // lexes an open parenthesis
  144. func lexOpenParen(l *lexer) stateFn {
  145. l.emit(_LPAREN)
  146. l.parenDepth++
  147. r := l.next()
  148. switch r {
  149. case ' ', '\t', '\n', '\r':
  150. return lexWhitespace
  151. case '\'':
  152. return lexQuote
  153. case '`':
  154. return lexQuasiquote
  155. case ',':
  156. return lexUnquote
  157. case '(':
  158. return lexOpenParen
  159. case ')':
  160. return lexCloseParen
  161. case ';':
  162. return lexComment
  163. case '#':
  164. return lexBool
  165. }
  166. if unicode.IsDigit(r) {
  167. return lexInt
  168. }
  169. return lexSymbol
  170. }
  171. func lexBool(l *lexer) stateFn {
  172. l.accept("tf")
  173. l.emit(_BOOL)
  174. r := l.next()
  175. switch r {
  176. case ' ', '\t', '\n':
  177. return lexWhitespace
  178. case ')':
  179. return lexCloseParen
  180. case ';':
  181. return lexComment
  182. }
  183. return l.errorf("unexpected tokens")
  184. }
  185. func lexQuote(l *lexer) stateFn {
  186. l.acceptRun(" ")
  187. l.ignore()
  188. l.emit(_QUOTE)
  189. r := l.next()
  190. switch r {
  191. case '"':
  192. return lexString
  193. case '(':
  194. return lexOpenParen
  195. case ')':
  196. return lexCloseParen
  197. case '#':
  198. return lexBool
  199. case '\'':
  200. return lexQuote
  201. case '`':
  202. return lexQuasiquote
  203. case ',':
  204. return lexUnquote
  205. }
  206. if unicode.IsDigit(r) {
  207. return lexInt
  208. }
  209. return lexSymbol
  210. }
  211. func lexQuasiquote(l *lexer) stateFn {
  212. l.acceptRun(" ")
  213. l.ignore()
  214. l.emit(_QUASIQUOTE)
  215. r := l.next()
  216. switch r {
  217. case '"':
  218. return lexString
  219. case '(':
  220. return lexOpenParen
  221. case ')':
  222. return lexCloseParen
  223. case '#':
  224. return lexBool
  225. case '\'':
  226. return lexQuote
  227. case '`':
  228. return lexQuasiquote
  229. case ',':
  230. return lexUnquote
  231. }
  232. if unicode.IsDigit(r) {
  233. return lexInt
  234. }
  235. return lexSymbol
  236. }
  237. func lexUnquote(l *lexer) stateFn {
  238. if l.peek() == '@' {
  239. return lexUnquoteSplice
  240. }
  241. l.acceptRun(" ")
  242. l.ignore()
  243. l.emit(_UNQUOTE)
  244. r := l.next()
  245. switch r {
  246. case '"':
  247. return lexString
  248. case '(':
  249. return lexOpenParen
  250. case ')':
  251. return lexCloseParen
  252. case '#':
  253. return lexBool
  254. case '\'':
  255. return lexQuote
  256. case '`':
  257. return lexQuasiquote
  258. case ',':
  259. return lexUnquote
  260. }
  261. if unicode.IsDigit(r) {
  262. return lexInt
  263. }
  264. return lexSymbol
  265. }
  266. func lexUnquoteSplice(l *lexer) stateFn {
  267. r := l.next()
  268. l.acceptRun(" ")
  269. l.ignore()
  270. l.emit(_UNQUOTESPLICE)
  271. r = l.next()
  272. switch r {
  273. case '"':
  274. return lexString
  275. case '(':
  276. return lexOpenParen
  277. case ')':
  278. return lexCloseParen
  279. case '#':
  280. return lexBool
  281. case '\'':
  282. return lexQuote
  283. case '`':
  284. return lexQuasiquote
  285. case ',':
  286. return lexUnquote
  287. }
  288. if unicode.IsDigit(r) {
  289. return lexInt
  290. }
  291. return lexSymbol
  292. }
  293. func lexWhitespace(l *lexer) stateFn {
  294. l.ignore()
  295. r := l.next()
  296. switch r {
  297. case ' ', '\t', '\n':
  298. return lexWhitespace
  299. case '\'':
  300. return lexQuote
  301. case '`':
  302. return lexQuasiquote
  303. case ',':
  304. return lexUnquote
  305. case '"':
  306. return lexString
  307. case '(':
  308. return lexOpenParen
  309. case ')':
  310. return lexCloseParen
  311. case ';':
  312. return lexComment
  313. case '#':
  314. return lexBool
  315. case eof:
  316. if l.parenDepth > 0 {
  317. return l.errorf("unclosed paren")
  318. }
  319. l.emit(_EOF)
  320. return nil
  321. }
  322. if unicode.IsDigit(r) {
  323. return lexInt
  324. }
  325. return lexSymbol
  326. }
  327. func lexString(l *lexer) stateFn {
  328. r := l.next()
  329. switch r {
  330. case '"':
  331. l.emit(_STRING)
  332. return lexWhitespace
  333. case '\\':
  334. // l.backup()
  335. // l.input = append(l.input[:l.pos], l.input[l.pos+1:])
  336. l.next()
  337. return lexString
  338. }
  339. return lexString
  340. }
  341. // lex an integer. Once we're on an integer, the only valid characters are
  342. // whitespace, close paren, a period to indicate we want a float, or more
  343. // digits. Everything else is crap.
  344. func lexInt(l *lexer) stateFn {
  345. digits := "0123456789"
  346. l.acceptRun(digits)
  347. r := l.peek()
  348. switch r {
  349. case ' ', '\t', '\n':
  350. l.emit(_INT)
  351. l.next()
  352. return lexWhitespace
  353. case '.':
  354. l.next()
  355. return lexFloat
  356. case ')':
  357. l.emit(_INT)
  358. l.next()
  359. return lexCloseParen
  360. case ';':
  361. l.emit(_INT)
  362. l.next()
  363. return lexComment
  364. }
  365. return l.errorf("unexpected rune in lexInt: %c", r)
  366. }
  367. // once we're in a float, the only valid values are digits, whitespace or close
  368. // paren.
  369. func lexFloat(l *lexer) stateFn {
  370. digits := "0123456789"
  371. l.acceptRun(digits)
  372. l.emit(_FLOAT)
  373. r := l.next()
  374. switch r {
  375. case ' ', '\t', '\n':
  376. return lexWhitespace
  377. case ')':
  378. return lexCloseParen
  379. case ';':
  380. return lexComment
  381. }
  382. return l.errorf("unexpected run in lexFloat: %c", r)
  383. }
  384. func lexSymbol(l *lexer) stateFn {
  385. r := l.peek()
  386. switch r {
  387. case ' ', '\t', '\n':
  388. l.emit(_SYMBOL)
  389. l.next()
  390. return lexWhitespace
  391. case ')':
  392. l.emit(_SYMBOL)
  393. l.next()
  394. return lexCloseParen
  395. case ';':
  396. l.emit(_SYMBOL)
  397. l.next()
  398. return lexComment
  399. default:
  400. l.next()
  401. return lexSymbol
  402. }
  403. }
  404. // lex a close parenthesis
  405. func lexCloseParen(l *lexer) stateFn {
  406. l.emit(_RPAREN)
  407. l.parenDepth--
  408. if l.parenDepth < 0 {
  409. return l.errorf("unexpected close paren")
  410. }
  411. r := l.next()
  412. switch r {
  413. case ' ', '\t', '\n':
  414. return lexWhitespace
  415. case '(':
  416. return lexOpenParen
  417. case ')':
  418. return lexCloseParen
  419. case ';':
  420. return lexComment
  421. }
  422. return l.errorf("unimplemented")
  423. }
  424. // lexes a comment
  425. func lexComment(l *lexer) stateFn {
  426. r := l.next()
  427. switch r {
  428. case '\n', '\r':
  429. return lexWhitespace
  430. }
  431. return lexComment
  432. }
  433. func lex(input string) *lexer {
  434. l := &lexer{
  435. // name: name,
  436. input: input,
  437. tokens: make(chan token),
  438. }
  439. go l.run()
  440. return l
  441. }
  442. func parse(l *lexer, p []Any) []Any {
  443. for {
  444. t := l.nextToken()
  445. if t.typ == _EOF {
  446. break
  447. } else if t.typ == _INVALID {
  448. panic("syntax error")
  449. }
  450. if t.typ == _LPAREN {
  451. p = append(p, parse(l, []Any{}))
  452. return parse(l, p)
  453. } else if t.typ == _RPAREN {
  454. return p
  455. } else {
  456. var v Any
  457. switch t.typ {
  458. case _UNQUOTESPLICE:
  459. nextExp := parse(l, []Any{})
  460. return append(append(p, []Any{Symbol("unquote-splice"), nextExp[0]}), nextExp[1:]...)
  461. case _UNQUOTE:
  462. nextExp := parse(l, []Any{})
  463. return append(append(p, []Any{Symbol("unquote"), nextExp[0]}), nextExp[1:]...)
  464. case _QUASIQUOTE:
  465. nextExp := parse(l, []Any{})
  466. return append(append(p, []Any{Symbol("quasiquote"), nextExp[0]}), nextExp[1:]...)
  467. case _QUOTE:
  468. nextExp := parse(l, []Any{})
  469. return append(append(p, []Any{Symbol("quote"), nextExp[0]}), nextExp[1:]...)
  470. case _INT:
  471. v, _ = strconv.ParseInt(t.val, 10, 0)
  472. case _FLOAT:
  473. v, _ = strconv.ParseFloat(t.val, 64)
  474. case _STRING:
  475. v = t.val[1 : len(t.val)-1]
  476. case _BOOL:
  477. if t.val == "#t" {
  478. v = true
  479. } else {
  480. v = false
  481. }
  482. case _SYMBOL:
  483. v = Symbol(t.val)
  484. }
  485. return parse(l, append(p, v))
  486. }
  487. }
  488. return p
  489. }
  490. func CamelCase(src string) string {
  491. var camelingRegex = regexp.MustCompile("[0-9A-Za-z]+")
  492. byteSrc := []byte(src)
  493. chunks := camelingRegex.FindAll(byteSrc, -1)
  494. for idx, val := range chunks {
  495. //if idx > 0 { chunks[idx] = bytes.Title(val) }
  496. chunks[idx] = bytes.Title(val)
  497. }
  498. return string(bytes.Join(chunks, nil))
  499. }
  500. func main() {
  501. fmt.Println(CamelCase("this-is-a-clojure-name"))
  502. r := bufio.NewReader(os.Stdin)
  503. for {
  504. fmt.Print(">> ")
  505. line, _, _ := r.ReadLine()
  506. l := lex(string(line) + "\n")
  507. p := parse(l, []Any{})
  508. fmt.Printf("%#v\n", p)
  509. }
  510. }