packer.go 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. package storage
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "io"
  6. "path/filepath"
  7. "unicode/utf8"
  8. )
  9. // ErrDuplicatePath occurs when a tar archive has more than one entry for the
  10. // same file path
  11. var ErrDuplicatePath = errors.New("duplicates of file paths not supported")
  12. // Packer describes the methods to pack Entries to a storage destination
  13. type Packer interface {
  14. // AddEntry packs the Entry and returns its position
  15. AddEntry(e Entry) (int, error)
  16. }
  17. // Unpacker describes the methods to read Entries from a source
  18. type Unpacker interface {
  19. // Next returns the next Entry being unpacked, or error, until io.EOF
  20. Next() (*Entry, error)
  21. }
  22. /* TODO(vbatts) figure out a good model for this
  23. type PackUnpacker interface {
  24. Packer
  25. Unpacker
  26. }
  27. */
  28. type jsonUnpacker struct {
  29. seen seenNames
  30. dec *json.Decoder
  31. }
  32. func (jup *jsonUnpacker) Next() (*Entry, error) {
  33. var e Entry
  34. err := jup.dec.Decode(&e)
  35. if err != nil {
  36. return nil, err
  37. }
  38. // check for dup name
  39. if e.Type == FileType {
  40. cName := filepath.Clean(e.GetName())
  41. if _, ok := jup.seen[cName]; ok {
  42. return nil, ErrDuplicatePath
  43. }
  44. jup.seen[cName] = struct{}{}
  45. }
  46. return &e, err
  47. }
  48. // NewJSONUnpacker provides an Unpacker that reads Entries (SegmentType and
  49. // FileType) as a json document.
  50. //
  51. // Each Entry read are expected to be delimited by new line.
  52. func NewJSONUnpacker(r io.Reader) Unpacker {
  53. return &jsonUnpacker{
  54. dec: json.NewDecoder(r),
  55. seen: seenNames{},
  56. }
  57. }
  58. type jsonPacker struct {
  59. w io.Writer
  60. e *json.Encoder
  61. pos int
  62. seen seenNames
  63. }
  64. type seenNames map[string]struct{}
  65. func (jp *jsonPacker) AddEntry(e Entry) (int, error) {
  66. // if Name is not valid utf8, switch it to raw first.
  67. if e.Name != "" {
  68. if !utf8.ValidString(e.Name) {
  69. e.NameRaw = []byte(e.Name)
  70. e.Name = ""
  71. }
  72. }
  73. // check early for dup name
  74. if e.Type == FileType {
  75. cName := filepath.Clean(e.GetName())
  76. if _, ok := jp.seen[cName]; ok {
  77. return -1, ErrDuplicatePath
  78. }
  79. jp.seen[cName] = struct{}{}
  80. }
  81. e.Position = jp.pos
  82. err := jp.e.Encode(e)
  83. if err != nil {
  84. return -1, err
  85. }
  86. // made it this far, increment now
  87. jp.pos++
  88. return e.Position, nil
  89. }
  90. // NewJSONPacker provides a Packer that writes each Entry (SegmentType and
  91. // FileType) as a json document.
  92. //
  93. // The Entries are delimited by new line.
  94. func NewJSONPacker(w io.Writer) Packer {
  95. return &jsonPacker{
  96. w: w,
  97. e: json.NewEncoder(w),
  98. seen: seenNames{},
  99. }
  100. }
  101. /*
  102. TODO(vbatts) perhaps have a more compact packer/unpacker, maybe using msgapck
  103. (https://github.com/ugorji/go)
  104. Even though, since our jsonUnpacker and jsonPacker just take
  105. io.Reader/io.Writer, then we can get away with passing them a
  106. gzip.Reader/gzip.Writer
  107. */