decode.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. package yaml
  2. import (
  3. "encoding/base64"
  4. "fmt"
  5. "reflect"
  6. "strconv"
  7. "time"
  8. )
  9. const (
  10. documentNode = 1 << iota
  11. mappingNode
  12. sequenceNode
  13. scalarNode
  14. aliasNode
  15. )
  16. type node struct {
  17. kind int
  18. line, column int
  19. tag string
  20. value string
  21. implicit bool
  22. children []*node
  23. anchors map[string]*node
  24. }
  25. // ----------------------------------------------------------------------------
  26. // Parser, produces a node tree out of a libyaml event stream.
  27. type parser struct {
  28. parser yaml_parser_t
  29. event yaml_event_t
  30. doc *node
  31. transform transformString
  32. }
  33. func newParser(b []byte, t transformString) *parser {
  34. p := parser{transform: t}
  35. if !yaml_parser_initialize(&p.parser) {
  36. panic("Failed to initialize YAML emitter")
  37. }
  38. if len(b) == 0 {
  39. b = []byte{'\n'}
  40. }
  41. yaml_parser_set_input_string(&p.parser, b)
  42. p.skip()
  43. if p.event.typ != yaml_STREAM_START_EVENT {
  44. panic("Expected stream start event, got " + strconv.Itoa(int(p.event.typ)))
  45. }
  46. p.skip()
  47. return &p
  48. }
  49. func (p *parser) destroy() {
  50. if p.event.typ != yaml_NO_EVENT {
  51. yaml_event_delete(&p.event)
  52. }
  53. yaml_parser_delete(&p.parser)
  54. }
  55. func (p *parser) skip() {
  56. if p.event.typ != yaml_NO_EVENT {
  57. if p.event.typ == yaml_STREAM_END_EVENT {
  58. fail("Attempted to go past the end of stream. Corrupted value?")
  59. }
  60. yaml_event_delete(&p.event)
  61. }
  62. if !yaml_parser_parse(&p.parser, &p.event) {
  63. p.fail()
  64. }
  65. }
  66. func (p *parser) fail() {
  67. var where string
  68. var line int
  69. if p.parser.problem_mark.line != 0 {
  70. line = p.parser.problem_mark.line
  71. } else if p.parser.context_mark.line != 0 {
  72. line = p.parser.context_mark.line
  73. }
  74. if line != 0 {
  75. where = "line " + strconv.Itoa(line) + ": "
  76. }
  77. var msg string
  78. if len(p.parser.problem) > 0 {
  79. msg = p.parser.problem
  80. } else {
  81. msg = "Unknown problem parsing YAML content"
  82. }
  83. fail(where + msg)
  84. }
  85. func (p *parser) anchor(n *node, anchor []byte) {
  86. if anchor != nil {
  87. p.doc.anchors[string(anchor)] = n
  88. }
  89. }
  90. func (p *parser) parse() *node {
  91. switch p.event.typ {
  92. case yaml_SCALAR_EVENT:
  93. return p.scalar()
  94. case yaml_ALIAS_EVENT:
  95. return p.alias()
  96. case yaml_MAPPING_START_EVENT:
  97. return p.mapping()
  98. case yaml_SEQUENCE_START_EVENT:
  99. return p.sequence()
  100. case yaml_DOCUMENT_START_EVENT:
  101. return p.document()
  102. case yaml_STREAM_END_EVENT:
  103. // Happens when attempting to decode an empty buffer.
  104. return nil
  105. default:
  106. panic("Attempted to parse unknown event: " + strconv.Itoa(int(p.event.typ)))
  107. }
  108. panic("unreachable")
  109. }
  110. func (p *parser) node(kind int) *node {
  111. return &node{
  112. kind: kind,
  113. line: p.event.start_mark.line,
  114. column: p.event.start_mark.column,
  115. }
  116. }
  117. func (p *parser) document() *node {
  118. n := p.node(documentNode)
  119. n.anchors = make(map[string]*node)
  120. p.doc = n
  121. p.skip()
  122. n.children = append(n.children, p.parse())
  123. if p.event.typ != yaml_DOCUMENT_END_EVENT {
  124. panic("Expected end of document event but got " + strconv.Itoa(int(p.event.typ)))
  125. }
  126. p.skip()
  127. return n
  128. }
  129. func (p *parser) alias() *node {
  130. n := p.node(aliasNode)
  131. n.value = string(p.event.anchor)
  132. p.skip()
  133. return n
  134. }
  135. func (p *parser) scalar() *node {
  136. n := p.node(scalarNode)
  137. n.value = string(p.event.value)
  138. n.tag = string(p.event.tag)
  139. n.implicit = p.event.implicit
  140. p.anchor(n, p.event.anchor)
  141. p.skip()
  142. return n
  143. }
  144. func (p *parser) sequence() *node {
  145. n := p.node(sequenceNode)
  146. p.anchor(n, p.event.anchor)
  147. p.skip()
  148. for p.event.typ != yaml_SEQUENCE_END_EVENT {
  149. n.children = append(n.children, p.parse())
  150. }
  151. p.skip()
  152. return n
  153. }
  154. func (p *parser) mapping() *node {
  155. n := p.node(mappingNode)
  156. p.anchor(n, p.event.anchor)
  157. p.skip()
  158. for p.event.typ != yaml_MAPPING_END_EVENT {
  159. key := p.parse()
  160. key.value = p.transform(key.value)
  161. value := p.parse()
  162. n.children = append(n.children, key, value)
  163. }
  164. p.skip()
  165. return n
  166. }
  167. // ----------------------------------------------------------------------------
  168. // Decoder, unmarshals a node into a provided value.
  169. type decoder struct {
  170. doc *node
  171. aliases map[string]bool
  172. }
  173. func newDecoder() *decoder {
  174. d := &decoder{}
  175. d.aliases = make(map[string]bool)
  176. return d
  177. }
  178. // d.setter deals with setters and pointer dereferencing and initialization.
  179. //
  180. // It's a slightly convoluted case to handle properly:
  181. //
  182. // - nil pointers should be initialized, unless being set to nil
  183. // - we don't know at this point yet what's the value to SetYAML() with.
  184. // - we can't separate pointer deref/init and setter checking, because
  185. // a setter may be found while going down a pointer chain.
  186. //
  187. // Thus, here is how it takes care of it:
  188. //
  189. // - out is provided as a pointer, so that it can be replaced.
  190. // - when looking at a non-setter ptr, *out=ptr.Elem(), unless tag=!!null
  191. // - when a setter is found, *out=interface{}, and a set() function is
  192. // returned to call SetYAML() with the value of *out once it's defined.
  193. //
  194. func (d *decoder) setter(tag string, out *reflect.Value, good *bool) (set func()) {
  195. if (*out).Kind() != reflect.Ptr && (*out).CanAddr() {
  196. setter, _ := (*out).Addr().Interface().(Setter)
  197. if setter != nil {
  198. var arg interface{}
  199. *out = reflect.ValueOf(&arg).Elem()
  200. return func() {
  201. *good = setter.SetYAML(shortTag(tag), arg)
  202. }
  203. }
  204. }
  205. again := true
  206. for again {
  207. again = false
  208. setter, _ := (*out).Interface().(Setter)
  209. if tag != yaml_NULL_TAG || setter != nil {
  210. if pv := (*out); pv.Kind() == reflect.Ptr {
  211. if pv.IsNil() {
  212. *out = reflect.New(pv.Type().Elem()).Elem()
  213. pv.Set((*out).Addr())
  214. } else {
  215. *out = pv.Elem()
  216. }
  217. setter, _ = pv.Interface().(Setter)
  218. again = true
  219. }
  220. }
  221. if setter != nil {
  222. var arg interface{}
  223. *out = reflect.ValueOf(&arg).Elem()
  224. return func() {
  225. *good = setter.SetYAML(shortTag(tag), arg)
  226. }
  227. }
  228. }
  229. return nil
  230. }
  231. func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
  232. switch n.kind {
  233. case documentNode:
  234. good = d.document(n, out)
  235. case scalarNode:
  236. good = d.scalar(n, out)
  237. case aliasNode:
  238. good = d.alias(n, out)
  239. case mappingNode:
  240. good = d.mapping(n, out)
  241. case sequenceNode:
  242. good = d.sequence(n, out)
  243. default:
  244. panic("Internal error: unknown node kind: " + strconv.Itoa(n.kind))
  245. }
  246. return
  247. }
  248. func (d *decoder) document(n *node, out reflect.Value) (good bool) {
  249. if len(n.children) == 1 {
  250. d.doc = n
  251. d.unmarshal(n.children[0], out)
  252. return true
  253. }
  254. return false
  255. }
  256. func (d *decoder) alias(n *node, out reflect.Value) (good bool) {
  257. an, ok := d.doc.anchors[n.value]
  258. if !ok {
  259. fail("Unknown anchor '" + n.value + "' referenced")
  260. }
  261. if d.aliases[n.value] {
  262. fail("Anchor '" + n.value + "' value contains itself")
  263. }
  264. d.aliases[n.value] = true
  265. good = d.unmarshal(an, out)
  266. delete(d.aliases, n.value)
  267. return good
  268. }
  269. var zeroValue reflect.Value
  270. func resetMap(out reflect.Value) {
  271. for _, k := range out.MapKeys() {
  272. out.SetMapIndex(k, zeroValue)
  273. }
  274. }
  275. var durationType = reflect.TypeOf(time.Duration(0))
  276. func (d *decoder) scalar(n *node, out reflect.Value) (good bool) {
  277. var tag string
  278. var resolved interface{}
  279. if n.tag == "" && !n.implicit {
  280. tag = yaml_STR_TAG
  281. resolved = n.value
  282. } else {
  283. tag, resolved = resolve(n.tag, n.value)
  284. if tag == yaml_BINARY_TAG {
  285. data, err := base64.StdEncoding.DecodeString(resolved.(string))
  286. if err != nil {
  287. fail("!!binary value contains invalid base64 data")
  288. }
  289. resolved = string(data)
  290. }
  291. }
  292. if set := d.setter(tag, &out, &good); set != nil {
  293. defer set()
  294. }
  295. if resolved == nil {
  296. if out.Kind() == reflect.Map && !out.CanAddr() {
  297. resetMap(out)
  298. } else {
  299. out.Set(reflect.Zero(out.Type()))
  300. }
  301. good = true
  302. return
  303. }
  304. switch out.Kind() {
  305. case reflect.String:
  306. if tag == yaml_BINARY_TAG {
  307. out.SetString(resolved.(string))
  308. good = true
  309. } else if resolved != nil {
  310. out.SetString(n.value)
  311. good = true
  312. }
  313. case reflect.Interface:
  314. if resolved == nil {
  315. out.Set(reflect.Zero(out.Type()))
  316. } else {
  317. out.Set(reflect.ValueOf(resolved))
  318. }
  319. good = true
  320. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  321. switch resolved := resolved.(type) {
  322. case int:
  323. if !out.OverflowInt(int64(resolved)) {
  324. out.SetInt(int64(resolved))
  325. good = true
  326. }
  327. case int64:
  328. if !out.OverflowInt(resolved) {
  329. out.SetInt(resolved)
  330. good = true
  331. }
  332. case float64:
  333. if resolved < 1<<63-1 && !out.OverflowInt(int64(resolved)) {
  334. out.SetInt(int64(resolved))
  335. good = true
  336. }
  337. case string:
  338. if out.Type() == durationType {
  339. d, err := time.ParseDuration(resolved)
  340. if err == nil {
  341. out.SetInt(int64(d))
  342. good = true
  343. }
  344. }
  345. }
  346. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  347. switch resolved := resolved.(type) {
  348. case int:
  349. if resolved >= 0 {
  350. out.SetUint(uint64(resolved))
  351. good = true
  352. }
  353. case int64:
  354. if resolved >= 0 {
  355. out.SetUint(uint64(resolved))
  356. good = true
  357. }
  358. case float64:
  359. if resolved < 1<<64-1 && !out.OverflowUint(uint64(resolved)) {
  360. out.SetUint(uint64(resolved))
  361. good = true
  362. }
  363. }
  364. case reflect.Bool:
  365. switch resolved := resolved.(type) {
  366. case bool:
  367. out.SetBool(resolved)
  368. good = true
  369. }
  370. case reflect.Float32, reflect.Float64:
  371. switch resolved := resolved.(type) {
  372. case int:
  373. out.SetFloat(float64(resolved))
  374. good = true
  375. case int64:
  376. out.SetFloat(float64(resolved))
  377. good = true
  378. case float64:
  379. out.SetFloat(resolved)
  380. good = true
  381. }
  382. case reflect.Ptr:
  383. if out.Type().Elem() == reflect.TypeOf(resolved) {
  384. elem := reflect.New(out.Type().Elem())
  385. elem.Elem().Set(reflect.ValueOf(resolved))
  386. out.Set(elem)
  387. good = true
  388. }
  389. }
  390. return good
  391. }
  392. func settableValueOf(i interface{}) reflect.Value {
  393. v := reflect.ValueOf(i)
  394. sv := reflect.New(v.Type()).Elem()
  395. sv.Set(v)
  396. return sv
  397. }
  398. func (d *decoder) sequence(n *node, out reflect.Value) (good bool) {
  399. if set := d.setter(yaml_SEQ_TAG, &out, &good); set != nil {
  400. defer set()
  401. }
  402. var iface reflect.Value
  403. if out.Kind() == reflect.Interface {
  404. // No type hints. Will have to use a generic sequence.
  405. iface = out
  406. out = settableValueOf(make([]interface{}, 0))
  407. }
  408. if out.Kind() != reflect.Slice {
  409. return false
  410. }
  411. et := out.Type().Elem()
  412. l := len(n.children)
  413. for i := 0; i < l; i++ {
  414. e := reflect.New(et).Elem()
  415. if ok := d.unmarshal(n.children[i], e); ok {
  416. out.Set(reflect.Append(out, e))
  417. }
  418. }
  419. if iface.IsValid() {
  420. iface.Set(out)
  421. }
  422. return true
  423. }
  424. func (d *decoder) mapping(n *node, out reflect.Value) (good bool) {
  425. if set := d.setter(yaml_MAP_TAG, &out, &good); set != nil {
  426. defer set()
  427. }
  428. if out.Kind() == reflect.Struct {
  429. return d.mappingStruct(n, out)
  430. }
  431. if out.Kind() == reflect.Interface {
  432. // No type hints. Will have to use a generic map.
  433. iface := out
  434. out = settableValueOf(make(map[interface{}]interface{}))
  435. iface.Set(out)
  436. }
  437. if out.Kind() != reflect.Map {
  438. return false
  439. }
  440. outt := out.Type()
  441. kt := outt.Key()
  442. et := outt.Elem()
  443. if out.IsNil() {
  444. out.Set(reflect.MakeMap(outt))
  445. }
  446. l := len(n.children)
  447. for i := 0; i < l; i += 2 {
  448. if isMerge(n.children[i]) {
  449. d.merge(n.children[i+1], out)
  450. continue
  451. }
  452. k := reflect.New(kt).Elem()
  453. if d.unmarshal(n.children[i], k) {
  454. kkind := k.Kind()
  455. if kkind == reflect.Interface {
  456. kkind = k.Elem().Kind()
  457. }
  458. if kkind == reflect.Map || kkind == reflect.Slice {
  459. fail(fmt.Sprintf("invalid map key: %#v", k.Interface()))
  460. }
  461. e := reflect.New(et).Elem()
  462. if d.unmarshal(n.children[i+1], e) {
  463. out.SetMapIndex(k, e)
  464. }
  465. }
  466. }
  467. return true
  468. }
  469. func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) {
  470. sinfo, err := getStructInfo(out.Type())
  471. if err != nil {
  472. panic(err)
  473. }
  474. name := settableValueOf("")
  475. l := len(n.children)
  476. for i := 0; i < l; i += 2 {
  477. ni := n.children[i]
  478. if isMerge(ni) {
  479. d.merge(n.children[i+1], out)
  480. continue
  481. }
  482. if !d.unmarshal(ni, name) {
  483. continue
  484. }
  485. if info, ok := sinfo.FieldsMap[name.String()]; ok {
  486. var field reflect.Value
  487. if info.Inline == nil {
  488. field = out.Field(info.Num)
  489. } else {
  490. field = out.FieldByIndex(info.Inline)
  491. }
  492. d.unmarshal(n.children[i+1], field)
  493. }
  494. }
  495. return true
  496. }
  497. func (d *decoder) merge(n *node, out reflect.Value) {
  498. const wantMap = "map merge requires map or sequence of maps as the value"
  499. switch n.kind {
  500. case mappingNode:
  501. d.unmarshal(n, out)
  502. case aliasNode:
  503. an, ok := d.doc.anchors[n.value]
  504. if ok && an.kind != mappingNode {
  505. fail(wantMap)
  506. }
  507. d.unmarshal(n, out)
  508. case sequenceNode:
  509. // Step backwards as earlier nodes take precedence.
  510. for i := len(n.children) - 1; i >= 0; i-- {
  511. ni := n.children[i]
  512. if ni.kind == aliasNode {
  513. an, ok := d.doc.anchors[ni.value]
  514. if ok && an.kind != mappingNode {
  515. fail(wantMap)
  516. }
  517. } else if ni.kind != mappingNode {
  518. fail(wantMap)
  519. }
  520. d.unmarshal(ni, out)
  521. }
  522. default:
  523. fail(wantMap)
  524. }
  525. }
  526. func isMerge(n *node) bool {
  527. return n.kind == scalarNode && n.value == "<<" && (n.implicit == true || n.tag == yaml_MERGE_TAG)
  528. }