disassemble.go 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. package asm
  2. import (
  3. "io"
  4. "io/ioutil"
  5. "github.com/vbatts/tar-split/archive/tar"
  6. "github.com/vbatts/tar-split/tar/storage"
  7. )
  8. // NewInputTarStream wraps the Reader stream of a tar archive and provides a
  9. // Reader stream of the same.
  10. //
  11. // In the middle it will pack the segments and file metadata to storage.Packer
  12. // `p`.
  13. //
  14. // The the storage.FilePutter is where payload of files in the stream are
  15. // stashed. If this stashing is not needed, you can provide a nil
  16. // storage.FilePutter. Since the checksumming is still needed, then a default
  17. // of NewDiscardFilePutter will be used internally
  18. func NewInputTarStream(r io.Reader, p storage.Packer, fp storage.FilePutter) (io.Reader, error) {
  19. // What to do here... folks will want their own access to the Reader that is
  20. // their tar archive stream, but we'll need that same stream to use our
  21. // forked 'archive/tar'.
  22. // Perhaps do an io.TeeReader that hands back an io.Reader for them to read
  23. // from, and we'll MITM the stream to store metadata.
  24. // We'll need a storage.FilePutter too ...
  25. // Another concern, whether to do any storage.FilePutter operations, such that we
  26. // don't extract any amount of the archive. But then again, we're not making
  27. // files/directories, hardlinks, etc. Just writing the io to the storage.FilePutter.
  28. // Perhaps we have a DiscardFilePutter that is a bit bucket.
  29. // we'll return the pipe reader, since TeeReader does not buffer and will
  30. // only read what the outputRdr Read's. Since Tar archives have padding on
  31. // the end, we want to be the one reading the padding, even if the user's
  32. // `archive/tar` doesn't care.
  33. pR, pW := io.Pipe()
  34. outputRdr := io.TeeReader(r, pW)
  35. // we need a putter that will generate the crc64 sums of file payloads
  36. if fp == nil {
  37. fp = storage.NewDiscardFilePutter()
  38. }
  39. go func() {
  40. tr := tar.NewReader(outputRdr)
  41. tr.RawAccounting = true
  42. for {
  43. hdr, err := tr.Next()
  44. if err != nil {
  45. if err != io.EOF {
  46. pW.CloseWithError(err)
  47. return
  48. }
  49. // even when an EOF is reached, there is often 1024 null bytes on
  50. // the end of an archive. Collect them too.
  51. if b := tr.RawBytes(); len(b) > 0 {
  52. _, err := p.AddEntry(storage.Entry{
  53. Type: storage.SegmentType,
  54. Payload: b,
  55. })
  56. if err != nil {
  57. pW.CloseWithError(err)
  58. return
  59. }
  60. }
  61. break // not return. We need the end of the reader.
  62. }
  63. if hdr == nil {
  64. break // not return. We need the end of the reader.
  65. }
  66. if b := tr.RawBytes(); len(b) > 0 {
  67. _, err := p.AddEntry(storage.Entry{
  68. Type: storage.SegmentType,
  69. Payload: b,
  70. })
  71. if err != nil {
  72. pW.CloseWithError(err)
  73. return
  74. }
  75. }
  76. var csum []byte
  77. if hdr.Size > 0 {
  78. var err error
  79. _, csum, err = fp.Put(hdr.Name, tr)
  80. if err != nil {
  81. pW.CloseWithError(err)
  82. return
  83. }
  84. }
  85. entry := storage.Entry{
  86. Type: storage.FileType,
  87. Size: hdr.Size,
  88. Payload: csum,
  89. }
  90. // For proper marshalling of non-utf8 characters
  91. entry.SetName(hdr.Name)
  92. // File entries added, regardless of size
  93. _, err = p.AddEntry(entry)
  94. if err != nil {
  95. pW.CloseWithError(err)
  96. return
  97. }
  98. if b := tr.RawBytes(); len(b) > 0 {
  99. _, err = p.AddEntry(storage.Entry{
  100. Type: storage.SegmentType,
  101. Payload: b,
  102. })
  103. if err != nil {
  104. pW.CloseWithError(err)
  105. return
  106. }
  107. }
  108. }
  109. // it is allowable, and not uncommon that there is further padding on the
  110. // end of an archive, apart from the expected 1024 null bytes.
  111. remainder, err := ioutil.ReadAll(outputRdr)
  112. if err != nil && err != io.EOF {
  113. pW.CloseWithError(err)
  114. return
  115. }
  116. _, err = p.AddEntry(storage.Entry{
  117. Type: storage.SegmentType,
  118. Payload: remainder,
  119. })
  120. if err != nil {
  121. pW.CloseWithError(err)
  122. return
  123. }
  124. pW.Close()
  125. }()
  126. return pR, nil
  127. }