reader.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package tar
  5. // TODO(dsymonds):
  6. // - pax extensions
  7. import (
  8. "bytes"
  9. "errors"
  10. "io"
  11. "io/ioutil"
  12. "os"
  13. "strconv"
  14. "strings"
  15. "time"
  16. )
  17. var (
  18. ErrHeader = errors.New("archive/tar: invalid tar header")
  19. )
  20. const maxNanoSecondIntSize = 9
  21. // A Reader provides sequential access to the contents of a tar archive.
  22. // A tar archive consists of a sequence of files.
  23. // The Next method advances to the next file in the archive (including the first),
  24. // and then it can be treated as an io.Reader to access the file's data.
  25. type Reader struct {
  26. r io.Reader
  27. err error
  28. pad int64 // amount of padding (ignored) after current file entry
  29. curr numBytesReader // reader for current file entry
  30. hdrBuff [blockSize]byte // buffer to use in readHeader
  31. RawAccounting bool // Whether to enable the access needed to reassemble the tar from raw bytes. Some performance/memory hit for this.
  32. rawBytes *bytes.Buffer // last raw bits
  33. }
  34. // RawBytes accesses the raw bytes of the archive, apart from the file payload itself.
  35. // This includes the header and padding.
  36. //
  37. // This call resets the current rawbytes buffer
  38. //
  39. // Only when RawAccounting is enabled, otherwise this returns nil
  40. func (tr *Reader) RawBytes() []byte {
  41. if !tr.RawAccounting {
  42. return nil
  43. }
  44. if tr.rawBytes == nil {
  45. tr.rawBytes = bytes.NewBuffer(nil)
  46. }
  47. // if we've read them, then flush them.
  48. defer tr.rawBytes.Reset()
  49. return tr.rawBytes.Bytes()
  50. }
  51. // A numBytesReader is an io.Reader with a numBytes method, returning the number
  52. // of bytes remaining in the underlying encoded data.
  53. type numBytesReader interface {
  54. io.Reader
  55. numBytes() int64
  56. }
  57. // A regFileReader is a numBytesReader for reading file data from a tar archive.
  58. type regFileReader struct {
  59. r io.Reader // underlying reader
  60. nb int64 // number of unread bytes for current file entry
  61. }
  62. // A sparseFileReader is a numBytesReader for reading sparse file data from a tar archive.
  63. type sparseFileReader struct {
  64. rfr *regFileReader // reads the sparse-encoded file data
  65. sp []sparseEntry // the sparse map for the file
  66. pos int64 // keeps track of file position
  67. tot int64 // total size of the file
  68. }
  69. // Keywords for GNU sparse files in a PAX extended header
  70. const (
  71. paxGNUSparseNumBlocks = "GNU.sparse.numblocks"
  72. paxGNUSparseOffset = "GNU.sparse.offset"
  73. paxGNUSparseNumBytes = "GNU.sparse.numbytes"
  74. paxGNUSparseMap = "GNU.sparse.map"
  75. paxGNUSparseName = "GNU.sparse.name"
  76. paxGNUSparseMajor = "GNU.sparse.major"
  77. paxGNUSparseMinor = "GNU.sparse.minor"
  78. paxGNUSparseSize = "GNU.sparse.size"
  79. paxGNUSparseRealSize = "GNU.sparse.realsize"
  80. )
  81. // Keywords for old GNU sparse headers
  82. const (
  83. oldGNUSparseMainHeaderOffset = 386
  84. oldGNUSparseMainHeaderIsExtendedOffset = 482
  85. oldGNUSparseMainHeaderNumEntries = 4
  86. oldGNUSparseExtendedHeaderIsExtendedOffset = 504
  87. oldGNUSparseExtendedHeaderNumEntries = 21
  88. oldGNUSparseOffsetSize = 12
  89. oldGNUSparseNumBytesSize = 12
  90. )
  91. // NewReader creates a new Reader reading from r.
  92. func NewReader(r io.Reader) *Reader { return &Reader{r: r} }
  93. // Next advances to the next entry in the tar archive.
  94. //
  95. // io.EOF is returned at the end of the input.
  96. func (tr *Reader) Next() (*Header, error) {
  97. var hdr *Header
  98. if tr.RawAccounting {
  99. if tr.rawBytes == nil {
  100. tr.rawBytes = bytes.NewBuffer(nil)
  101. } else {
  102. tr.rawBytes.Reset()
  103. }
  104. }
  105. if tr.err == nil {
  106. tr.skipUnread()
  107. }
  108. if tr.err != nil {
  109. return hdr, tr.err
  110. }
  111. hdr = tr.readHeader()
  112. if hdr == nil {
  113. return hdr, tr.err
  114. }
  115. // Check for PAX/GNU header.
  116. switch hdr.Typeflag {
  117. case TypeXHeader:
  118. // PAX extended header
  119. headers, err := parsePAX(tr)
  120. if err != nil {
  121. return nil, err
  122. }
  123. // We actually read the whole file,
  124. // but this skips alignment padding
  125. tr.skipUnread()
  126. if tr.err != nil {
  127. return nil, tr.err
  128. }
  129. hdr = tr.readHeader()
  130. if hdr == nil {
  131. return nil, tr.err
  132. }
  133. mergePAX(hdr, headers)
  134. // Check for a PAX format sparse file
  135. sp, err := tr.checkForGNUSparsePAXHeaders(hdr, headers)
  136. if err != nil {
  137. tr.err = err
  138. return nil, err
  139. }
  140. if sp != nil {
  141. // Current file is a PAX format GNU sparse file.
  142. // Set the current file reader to a sparse file reader.
  143. tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
  144. }
  145. return hdr, nil
  146. case TypeGNULongName:
  147. // We have a GNU long name header. Its contents are the real file name.
  148. realname, err := ioutil.ReadAll(tr)
  149. if err != nil {
  150. return nil, err
  151. }
  152. var buf []byte
  153. if tr.RawAccounting {
  154. if _, err = tr.rawBytes.Write(realname); err != nil {
  155. return nil, err
  156. }
  157. buf = make([]byte, tr.rawBytes.Len())
  158. copy(buf[:], tr.RawBytes())
  159. }
  160. hdr, err := tr.Next()
  161. // since the above call to Next() resets the buffer, we need to throw the bytes over
  162. if tr.RawAccounting {
  163. buf = append(buf, tr.RawBytes()...)
  164. if _, err = tr.rawBytes.Write(buf); err != nil {
  165. return nil, err
  166. }
  167. }
  168. hdr.Name = cString(realname)
  169. return hdr, err
  170. case TypeGNULongLink:
  171. // We have a GNU long link header.
  172. realname, err := ioutil.ReadAll(tr)
  173. if err != nil {
  174. return nil, err
  175. }
  176. var buf []byte
  177. if tr.RawAccounting {
  178. if _, err = tr.rawBytes.Write(realname); err != nil {
  179. return nil, err
  180. }
  181. buf = make([]byte, tr.rawBytes.Len())
  182. copy(buf[:], tr.RawBytes())
  183. }
  184. hdr, err := tr.Next()
  185. // since the above call to Next() resets the buffer, we need to throw the bytes over
  186. if tr.RawAccounting {
  187. buf = append(buf, tr.RawBytes()...)
  188. if _, err = tr.rawBytes.Write(buf); err != nil {
  189. return nil, err
  190. }
  191. }
  192. hdr.Linkname = cString(realname)
  193. return hdr, err
  194. }
  195. return hdr, tr.err
  196. }
  197. // checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then
  198. // this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to
  199. // be treated as a regular file.
  200. func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) {
  201. var sparseFormat string
  202. // Check for sparse format indicators
  203. major, majorOk := headers[paxGNUSparseMajor]
  204. minor, minorOk := headers[paxGNUSparseMinor]
  205. sparseName, sparseNameOk := headers[paxGNUSparseName]
  206. _, sparseMapOk := headers[paxGNUSparseMap]
  207. sparseSize, sparseSizeOk := headers[paxGNUSparseSize]
  208. sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize]
  209. // Identify which, if any, sparse format applies from which PAX headers are set
  210. if majorOk && minorOk {
  211. sparseFormat = major + "." + minor
  212. } else if sparseNameOk && sparseMapOk {
  213. sparseFormat = "0.1"
  214. } else if sparseSizeOk {
  215. sparseFormat = "0.0"
  216. } else {
  217. // Not a PAX format GNU sparse file.
  218. return nil, nil
  219. }
  220. // Check for unknown sparse format
  221. if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" {
  222. return nil, nil
  223. }
  224. // Update hdr from GNU sparse PAX headers
  225. if sparseNameOk {
  226. hdr.Name = sparseName
  227. }
  228. if sparseSizeOk {
  229. realSize, err := strconv.ParseInt(sparseSize, 10, 0)
  230. if err != nil {
  231. return nil, ErrHeader
  232. }
  233. hdr.Size = realSize
  234. } else if sparseRealSizeOk {
  235. realSize, err := strconv.ParseInt(sparseRealSize, 10, 0)
  236. if err != nil {
  237. return nil, ErrHeader
  238. }
  239. hdr.Size = realSize
  240. }
  241. // Set up the sparse map, according to the particular sparse format in use
  242. var sp []sparseEntry
  243. var err error
  244. switch sparseFormat {
  245. case "0.0", "0.1":
  246. sp, err = readGNUSparseMap0x1(headers)
  247. case "1.0":
  248. sp, err = readGNUSparseMap1x0(tr.curr)
  249. }
  250. return sp, err
  251. }
  252. // mergePAX merges well known headers according to PAX standard.
  253. // In general headers with the same name as those found
  254. // in the header struct overwrite those found in the header
  255. // struct with higher precision or longer values. Esp. useful
  256. // for name and linkname fields.
  257. func mergePAX(hdr *Header, headers map[string]string) error {
  258. for k, v := range headers {
  259. switch k {
  260. case paxPath:
  261. hdr.Name = v
  262. case paxLinkpath:
  263. hdr.Linkname = v
  264. case paxGname:
  265. hdr.Gname = v
  266. case paxUname:
  267. hdr.Uname = v
  268. case paxUid:
  269. uid, err := strconv.ParseInt(v, 10, 0)
  270. if err != nil {
  271. return err
  272. }
  273. hdr.Uid = int(uid)
  274. case paxGid:
  275. gid, err := strconv.ParseInt(v, 10, 0)
  276. if err != nil {
  277. return err
  278. }
  279. hdr.Gid = int(gid)
  280. case paxAtime:
  281. t, err := parsePAXTime(v)
  282. if err != nil {
  283. return err
  284. }
  285. hdr.AccessTime = t
  286. case paxMtime:
  287. t, err := parsePAXTime(v)
  288. if err != nil {
  289. return err
  290. }
  291. hdr.ModTime = t
  292. case paxCtime:
  293. t, err := parsePAXTime(v)
  294. if err != nil {
  295. return err
  296. }
  297. hdr.ChangeTime = t
  298. case paxSize:
  299. size, err := strconv.ParseInt(v, 10, 0)
  300. if err != nil {
  301. return err
  302. }
  303. hdr.Size = int64(size)
  304. default:
  305. if strings.HasPrefix(k, paxXattr) {
  306. if hdr.Xattrs == nil {
  307. hdr.Xattrs = make(map[string]string)
  308. }
  309. hdr.Xattrs[k[len(paxXattr):]] = v
  310. }
  311. }
  312. }
  313. return nil
  314. }
  315. // parsePAXTime takes a string of the form %d.%d as described in
  316. // the PAX specification.
  317. func parsePAXTime(t string) (time.Time, error) {
  318. buf := []byte(t)
  319. pos := bytes.IndexByte(buf, '.')
  320. var seconds, nanoseconds int64
  321. var err error
  322. if pos == -1 {
  323. seconds, err = strconv.ParseInt(t, 10, 0)
  324. if err != nil {
  325. return time.Time{}, err
  326. }
  327. } else {
  328. seconds, err = strconv.ParseInt(string(buf[:pos]), 10, 0)
  329. if err != nil {
  330. return time.Time{}, err
  331. }
  332. nano_buf := string(buf[pos+1:])
  333. // Pad as needed before converting to a decimal.
  334. // For example .030 -> .030000000 -> 30000000 nanoseconds
  335. if len(nano_buf) < maxNanoSecondIntSize {
  336. // Right pad
  337. nano_buf += strings.Repeat("0", maxNanoSecondIntSize-len(nano_buf))
  338. } else if len(nano_buf) > maxNanoSecondIntSize {
  339. // Right truncate
  340. nano_buf = nano_buf[:maxNanoSecondIntSize]
  341. }
  342. nanoseconds, err = strconv.ParseInt(string(nano_buf), 10, 0)
  343. if err != nil {
  344. return time.Time{}, err
  345. }
  346. }
  347. ts := time.Unix(seconds, nanoseconds)
  348. return ts, nil
  349. }
  350. // parsePAX parses PAX headers.
  351. // If an extended header (type 'x') is invalid, ErrHeader is returned
  352. func parsePAX(r io.Reader) (map[string]string, error) {
  353. buf, err := ioutil.ReadAll(r)
  354. if err != nil {
  355. return nil, err
  356. }
  357. // leaving this function for io.Reader makes it more testable
  358. if tr, ok := r.(*Reader); ok && tr.RawAccounting {
  359. if _, err = tr.rawBytes.Write(buf); err != nil {
  360. return nil, err
  361. }
  362. }
  363. // For GNU PAX sparse format 0.0 support.
  364. // This function transforms the sparse format 0.0 headers into sparse format 0.1 headers.
  365. var sparseMap bytes.Buffer
  366. headers := make(map[string]string)
  367. // Each record is constructed as
  368. // "%d %s=%s\n", length, keyword, value
  369. for len(buf) > 0 {
  370. // or the header was empty to start with.
  371. var sp int
  372. // The size field ends at the first space.
  373. sp = bytes.IndexByte(buf, ' ')
  374. if sp == -1 {
  375. return nil, ErrHeader
  376. }
  377. // Parse the first token as a decimal integer.
  378. n, err := strconv.ParseInt(string(buf[:sp]), 10, 0)
  379. if err != nil || n < 5 || int64(len(buf)) < n {
  380. return nil, ErrHeader
  381. }
  382. // Extract everything between the decimal and the n -1 on the
  383. // beginning to eat the ' ', -1 on the end to skip the newline.
  384. var record []byte
  385. record, buf = buf[sp+1:n-1], buf[n:]
  386. // The first equals is guaranteed to mark the end of the key.
  387. // Everything else is value.
  388. eq := bytes.IndexByte(record, '=')
  389. if eq == -1 {
  390. return nil, ErrHeader
  391. }
  392. key, value := record[:eq], record[eq+1:]
  393. keyStr := string(key)
  394. if keyStr == paxGNUSparseOffset || keyStr == paxGNUSparseNumBytes {
  395. // GNU sparse format 0.0 special key. Write to sparseMap instead of using the headers map.
  396. sparseMap.Write(value)
  397. sparseMap.Write([]byte{','})
  398. } else {
  399. // Normal key. Set the value in the headers map.
  400. headers[keyStr] = string(value)
  401. }
  402. }
  403. if sparseMap.Len() != 0 {
  404. // Add sparse info to headers, chopping off the extra comma
  405. sparseMap.Truncate(sparseMap.Len() - 1)
  406. headers[paxGNUSparseMap] = sparseMap.String()
  407. }
  408. return headers, nil
  409. }
  410. // cString parses bytes as a NUL-terminated C-style string.
  411. // If a NUL byte is not found then the whole slice is returned as a string.
  412. func cString(b []byte) string {
  413. n := 0
  414. for n < len(b) && b[n] != 0 {
  415. n++
  416. }
  417. return string(b[0:n])
  418. }
  419. func (tr *Reader) octal(b []byte) int64 {
  420. // Check for binary format first.
  421. if len(b) > 0 && b[0]&0x80 != 0 {
  422. var x int64
  423. for i, c := range b {
  424. if i == 0 {
  425. c &= 0x7f // ignore signal bit in first byte
  426. }
  427. x = x<<8 | int64(c)
  428. }
  429. return x
  430. }
  431. // Because unused fields are filled with NULs, we need
  432. // to skip leading NULs. Fields may also be padded with
  433. // spaces or NULs.
  434. // So we remove leading and trailing NULs and spaces to
  435. // be sure.
  436. b = bytes.Trim(b, " \x00")
  437. if len(b) == 0 {
  438. return 0
  439. }
  440. x, err := strconv.ParseUint(cString(b), 8, 64)
  441. if err != nil {
  442. tr.err = err
  443. }
  444. return int64(x)
  445. }
  446. // skipUnread skips any unread bytes in the existing file entry, as well as any alignment padding.
  447. func (tr *Reader) skipUnread() {
  448. nr := tr.numBytes() + tr.pad // number of bytes to skip
  449. tr.curr, tr.pad = nil, 0
  450. if tr.RawAccounting {
  451. _, tr.err = io.CopyN(tr.rawBytes, tr.r, nr)
  452. return
  453. }
  454. if sr, ok := tr.r.(io.Seeker); ok {
  455. if _, err := sr.Seek(nr, os.SEEK_CUR); err == nil {
  456. return
  457. }
  458. }
  459. _, tr.err = io.CopyN(ioutil.Discard, tr.r, nr)
  460. }
  461. func (tr *Reader) verifyChecksum(header []byte) bool {
  462. if tr.err != nil {
  463. return false
  464. }
  465. given := tr.octal(header[148:156])
  466. unsigned, signed := checksum(header)
  467. return given == unsigned || given == signed
  468. }
  469. func (tr *Reader) readHeader() *Header {
  470. header := tr.hdrBuff[:]
  471. copy(header, zeroBlock)
  472. if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
  473. // because it could read some of the block, but reach EOF first
  474. if tr.err == io.EOF && tr.RawAccounting {
  475. if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
  476. return nil
  477. }
  478. }
  479. return nil
  480. }
  481. if tr.RawAccounting {
  482. if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
  483. return nil
  484. }
  485. }
  486. // Two blocks of zero bytes marks the end of the archive.
  487. if bytes.Equal(header, zeroBlock[0:blockSize]) {
  488. if _, tr.err = io.ReadFull(tr.r, header); tr.err != nil {
  489. // because it could read some of the block, but reach EOF first
  490. if tr.err == io.EOF && tr.RawAccounting {
  491. if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
  492. return nil
  493. }
  494. }
  495. return nil
  496. }
  497. if tr.RawAccounting {
  498. if _, tr.err = tr.rawBytes.Write(header); tr.err != nil {
  499. return nil
  500. }
  501. }
  502. if bytes.Equal(header, zeroBlock[0:blockSize]) {
  503. tr.err = io.EOF
  504. } else {
  505. tr.err = ErrHeader // zero block and then non-zero block
  506. }
  507. return nil
  508. }
  509. if !tr.verifyChecksum(header) {
  510. tr.err = ErrHeader
  511. return nil
  512. }
  513. // Unpack
  514. hdr := new(Header)
  515. s := slicer(header)
  516. hdr.Name = cString(s.next(100))
  517. hdr.Mode = tr.octal(s.next(8))
  518. hdr.Uid = int(tr.octal(s.next(8)))
  519. hdr.Gid = int(tr.octal(s.next(8)))
  520. hdr.Size = tr.octal(s.next(12))
  521. if hdr.Size < 0 {
  522. tr.err = ErrHeader
  523. return nil
  524. }
  525. hdr.ModTime = time.Unix(tr.octal(s.next(12)), 0)
  526. s.next(8) // chksum
  527. hdr.Typeflag = s.next(1)[0]
  528. hdr.Linkname = cString(s.next(100))
  529. // The remainder of the header depends on the value of magic.
  530. // The original (v7) version of tar had no explicit magic field,
  531. // so its magic bytes, like the rest of the block, are NULs.
  532. magic := string(s.next(8)) // contains version field as well.
  533. var format string
  534. switch {
  535. case magic[:6] == "ustar\x00": // POSIX tar (1003.1-1988)
  536. if string(header[508:512]) == "tar\x00" {
  537. format = "star"
  538. } else {
  539. format = "posix"
  540. }
  541. case magic == "ustar \x00": // old GNU tar
  542. format = "gnu"
  543. }
  544. switch format {
  545. case "posix", "gnu", "star":
  546. hdr.Uname = cString(s.next(32))
  547. hdr.Gname = cString(s.next(32))
  548. devmajor := s.next(8)
  549. devminor := s.next(8)
  550. if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock {
  551. hdr.Devmajor = tr.octal(devmajor)
  552. hdr.Devminor = tr.octal(devminor)
  553. }
  554. var prefix string
  555. switch format {
  556. case "posix", "gnu":
  557. prefix = cString(s.next(155))
  558. case "star":
  559. prefix = cString(s.next(131))
  560. hdr.AccessTime = time.Unix(tr.octal(s.next(12)), 0)
  561. hdr.ChangeTime = time.Unix(tr.octal(s.next(12)), 0)
  562. }
  563. if len(prefix) > 0 {
  564. hdr.Name = prefix + "/" + hdr.Name
  565. }
  566. }
  567. if tr.err != nil {
  568. tr.err = ErrHeader
  569. return nil
  570. }
  571. // Maximum value of hdr.Size is 64 GB (12 octal digits),
  572. // so there's no risk of int64 overflowing.
  573. nb := int64(hdr.Size)
  574. tr.pad = -nb & (blockSize - 1) // blockSize is a power of two
  575. // Set the current file reader.
  576. tr.curr = &regFileReader{r: tr.r, nb: nb}
  577. // Check for old GNU sparse format entry.
  578. if hdr.Typeflag == TypeGNUSparse {
  579. // Get the real size of the file.
  580. hdr.Size = tr.octal(header[483:495])
  581. // Read the sparse map.
  582. sp := tr.readOldGNUSparseMap(header)
  583. if tr.err != nil {
  584. return nil
  585. }
  586. // Current file is a GNU sparse file. Update the current file reader.
  587. tr.curr = &sparseFileReader{rfr: tr.curr.(*regFileReader), sp: sp, tot: hdr.Size}
  588. }
  589. return hdr
  590. }
  591. // A sparseEntry holds a single entry in a sparse file's sparse map.
  592. // A sparse entry indicates the offset and size in a sparse file of a
  593. // block of data.
  594. type sparseEntry struct {
  595. offset int64
  596. numBytes int64
  597. }
  598. // readOldGNUSparseMap reads the sparse map as stored in the old GNU sparse format.
  599. // The sparse map is stored in the tar header if it's small enough. If it's larger than four entries,
  600. // then one or more extension headers are used to store the rest of the sparse map.
  601. func (tr *Reader) readOldGNUSparseMap(header []byte) []sparseEntry {
  602. isExtended := header[oldGNUSparseMainHeaderIsExtendedOffset] != 0
  603. spCap := oldGNUSparseMainHeaderNumEntries
  604. if isExtended {
  605. spCap += oldGNUSparseExtendedHeaderNumEntries
  606. }
  607. sp := make([]sparseEntry, 0, spCap)
  608. s := slicer(header[oldGNUSparseMainHeaderOffset:])
  609. // Read the four entries from the main tar header
  610. for i := 0; i < oldGNUSparseMainHeaderNumEntries; i++ {
  611. offset := tr.octal(s.next(oldGNUSparseOffsetSize))
  612. numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
  613. if tr.err != nil {
  614. tr.err = ErrHeader
  615. return nil
  616. }
  617. if offset == 0 && numBytes == 0 {
  618. break
  619. }
  620. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  621. }
  622. for isExtended {
  623. // There are more entries. Read an extension header and parse its entries.
  624. sparseHeader := make([]byte, blockSize)
  625. if _, tr.err = io.ReadFull(tr.r, sparseHeader); tr.err != nil {
  626. return nil
  627. }
  628. if tr.RawAccounting {
  629. if _, tr.err = tr.rawBytes.Write(sparseHeader); tr.err != nil {
  630. return nil
  631. }
  632. }
  633. isExtended = sparseHeader[oldGNUSparseExtendedHeaderIsExtendedOffset] != 0
  634. s = slicer(sparseHeader)
  635. for i := 0; i < oldGNUSparseExtendedHeaderNumEntries; i++ {
  636. offset := tr.octal(s.next(oldGNUSparseOffsetSize))
  637. numBytes := tr.octal(s.next(oldGNUSparseNumBytesSize))
  638. if tr.err != nil {
  639. tr.err = ErrHeader
  640. return nil
  641. }
  642. if offset == 0 && numBytes == 0 {
  643. break
  644. }
  645. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  646. }
  647. }
  648. return sp
  649. }
  650. // readGNUSparseMap1x0 reads the sparse map as stored in GNU's PAX sparse format version 1.0.
  651. // The sparse map is stored just before the file data and padded out to the nearest block boundary.
  652. func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) {
  653. buf := make([]byte, 2*blockSize)
  654. sparseHeader := buf[:blockSize]
  655. // readDecimal is a helper function to read a decimal integer from the sparse map
  656. // while making sure to read from the file in blocks of size blockSize
  657. readDecimal := func() (int64, error) {
  658. // Look for newline
  659. nl := bytes.IndexByte(sparseHeader, '\n')
  660. if nl == -1 {
  661. if len(sparseHeader) >= blockSize {
  662. // This is an error
  663. return 0, ErrHeader
  664. }
  665. oldLen := len(sparseHeader)
  666. newLen := oldLen + blockSize
  667. if cap(sparseHeader) < newLen {
  668. // There's more header, but we need to make room for the next block
  669. copy(buf, sparseHeader)
  670. sparseHeader = buf[:newLen]
  671. } else {
  672. // There's more header, and we can just reslice
  673. sparseHeader = sparseHeader[:newLen]
  674. }
  675. // Now that sparseHeader is large enough, read next block
  676. if _, err := io.ReadFull(r, sparseHeader[oldLen:newLen]); err != nil {
  677. return 0, err
  678. }
  679. // leaving this function for io.Reader makes it more testable
  680. if tr, ok := r.(*Reader); ok && tr.RawAccounting {
  681. if _, err := tr.rawBytes.Write(sparseHeader[oldLen:newLen]); err != nil {
  682. return 0, err
  683. }
  684. }
  685. // Look for a newline in the new data
  686. nl = bytes.IndexByte(sparseHeader[oldLen:newLen], '\n')
  687. if nl == -1 {
  688. // This is an error
  689. return 0, ErrHeader
  690. }
  691. nl += oldLen // We want the position from the beginning
  692. }
  693. // Now that we've found a newline, read a number
  694. n, err := strconv.ParseInt(string(sparseHeader[:nl]), 10, 0)
  695. if err != nil {
  696. return 0, ErrHeader
  697. }
  698. // Update sparseHeader to consume this number
  699. sparseHeader = sparseHeader[nl+1:]
  700. return n, nil
  701. }
  702. // Read the first block
  703. if _, err := io.ReadFull(r, sparseHeader); err != nil {
  704. return nil, err
  705. }
  706. // leaving this function for io.Reader makes it more testable
  707. if tr, ok := r.(*Reader); ok && tr.RawAccounting {
  708. if _, err := tr.rawBytes.Write(sparseHeader); err != nil {
  709. return nil, err
  710. }
  711. }
  712. // The first line contains the number of entries
  713. numEntries, err := readDecimal()
  714. if err != nil {
  715. return nil, err
  716. }
  717. // Read all the entries
  718. sp := make([]sparseEntry, 0, numEntries)
  719. for i := int64(0); i < numEntries; i++ {
  720. // Read the offset
  721. offset, err := readDecimal()
  722. if err != nil {
  723. return nil, err
  724. }
  725. // Read numBytes
  726. numBytes, err := readDecimal()
  727. if err != nil {
  728. return nil, err
  729. }
  730. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  731. }
  732. return sp, nil
  733. }
  734. // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format version 0.1.
  735. // The sparse map is stored in the PAX headers.
  736. func readGNUSparseMap0x1(headers map[string]string) ([]sparseEntry, error) {
  737. // Get number of entries
  738. numEntriesStr, ok := headers[paxGNUSparseNumBlocks]
  739. if !ok {
  740. return nil, ErrHeader
  741. }
  742. numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
  743. if err != nil {
  744. return nil, ErrHeader
  745. }
  746. sparseMap := strings.Split(headers[paxGNUSparseMap], ",")
  747. // There should be two numbers in sparseMap for each entry
  748. if int64(len(sparseMap)) != 2*numEntries {
  749. return nil, ErrHeader
  750. }
  751. // Loop through the entries in the sparse map
  752. sp := make([]sparseEntry, 0, numEntries)
  753. for i := int64(0); i < numEntries; i++ {
  754. offset, err := strconv.ParseInt(sparseMap[2*i], 10, 0)
  755. if err != nil {
  756. return nil, ErrHeader
  757. }
  758. numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 0)
  759. if err != nil {
  760. return nil, ErrHeader
  761. }
  762. sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes})
  763. }
  764. return sp, nil
  765. }
  766. // numBytes returns the number of bytes left to read in the current file's entry
  767. // in the tar archive, or 0 if there is no current file.
  768. func (tr *Reader) numBytes() int64 {
  769. if tr.curr == nil {
  770. // No current file, so no bytes
  771. return 0
  772. }
  773. return tr.curr.numBytes()
  774. }
  775. // Read reads from the current entry in the tar archive.
  776. // It returns 0, io.EOF when it reaches the end of that entry,
  777. // until Next is called to advance to the next entry.
  778. func (tr *Reader) Read(b []byte) (n int, err error) {
  779. if tr.curr == nil {
  780. return 0, io.EOF
  781. }
  782. n, err = tr.curr.Read(b)
  783. if err != nil && err != io.EOF {
  784. tr.err = err
  785. }
  786. return
  787. }
  788. func (rfr *regFileReader) Read(b []byte) (n int, err error) {
  789. if rfr.nb == 0 {
  790. // file consumed
  791. return 0, io.EOF
  792. }
  793. if int64(len(b)) > rfr.nb {
  794. b = b[0:rfr.nb]
  795. }
  796. n, err = rfr.r.Read(b)
  797. rfr.nb -= int64(n)
  798. if err == io.EOF && rfr.nb > 0 {
  799. err = io.ErrUnexpectedEOF
  800. }
  801. return
  802. }
  803. // numBytes returns the number of bytes left to read in the file's data in the tar archive.
  804. func (rfr *regFileReader) numBytes() int64 {
  805. return rfr.nb
  806. }
  807. // readHole reads a sparse file hole ending at offset toOffset
  808. func (sfr *sparseFileReader) readHole(b []byte, toOffset int64) int {
  809. n64 := toOffset - sfr.pos
  810. if n64 > int64(len(b)) {
  811. n64 = int64(len(b))
  812. }
  813. n := int(n64)
  814. for i := 0; i < n; i++ {
  815. b[i] = 0
  816. }
  817. sfr.pos += n64
  818. return n
  819. }
  820. // Read reads the sparse file data in expanded form.
  821. func (sfr *sparseFileReader) Read(b []byte) (n int, err error) {
  822. if len(sfr.sp) == 0 {
  823. // No more data fragments to read from.
  824. if sfr.pos < sfr.tot {
  825. // We're in the last hole
  826. n = sfr.readHole(b, sfr.tot)
  827. return
  828. }
  829. // Otherwise, we're at the end of the file
  830. return 0, io.EOF
  831. }
  832. if sfr.tot < sfr.sp[0].offset {
  833. return 0, io.ErrUnexpectedEOF
  834. }
  835. if sfr.pos < sfr.sp[0].offset {
  836. // We're in a hole
  837. n = sfr.readHole(b, sfr.sp[0].offset)
  838. return
  839. }
  840. // We're not in a hole, so we'll read from the next data fragment
  841. posInFragment := sfr.pos - sfr.sp[0].offset
  842. bytesLeft := sfr.sp[0].numBytes - posInFragment
  843. if int64(len(b)) > bytesLeft {
  844. b = b[0:bytesLeft]
  845. }
  846. n, err = sfr.rfr.Read(b)
  847. sfr.pos += int64(n)
  848. if int64(n) == bytesLeft {
  849. // We're done with this fragment
  850. sfr.sp = sfr.sp[1:]
  851. }
  852. if err == io.EOF && sfr.pos < sfr.tot {
  853. // We reached the end of the last fragment's data, but there's a final hole
  854. err = nil
  855. }
  856. return
  857. }
  858. // numBytes returns the number of bytes left to read in the sparse file's
  859. // sparse-encoded data in the tar archive.
  860. func (sfr *sparseFileReader) numBytes() int64 {
  861. return sfr.rfr.nb
  862. }