difflib.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. // Package difflib is a partial port of Python difflib module.
  2. //
  3. // It provides tools to compare sequences of strings and generate textual diffs.
  4. //
  5. // The following class and functions have been ported:
  6. //
  7. // - SequenceMatcher
  8. //
  9. // - unified_diff
  10. //
  11. // - context_diff
  12. //
  13. // Getting unified diffs was the main goal of the port. Keep in mind this code
  14. // is mostly suitable to output text differences in a human friendly way, there
  15. // are no guarantees generated diffs are consumable by patch(1).
  16. package difflib
  17. import (
  18. "bufio"
  19. "bytes"
  20. "fmt"
  21. "io"
  22. "strings"
  23. )
  24. func min(a, b int) int {
  25. if a < b {
  26. return a
  27. }
  28. return b
  29. }
  30. func max(a, b int) int {
  31. if a > b {
  32. return a
  33. }
  34. return b
  35. }
  36. func calculateRatio(matches, length int) float64 {
  37. if length > 0 {
  38. return 2.0 * float64(matches) / float64(length)
  39. }
  40. return 1.0
  41. }
  42. type Match struct {
  43. A int
  44. B int
  45. Size int
  46. }
  47. type OpCode struct {
  48. Tag byte
  49. I1 int
  50. I2 int
  51. J1 int
  52. J2 int
  53. }
  54. // SequenceMatcher compares sequence of strings. The basic
  55. // algorithm predates, and is a little fancier than, an algorithm
  56. // published in the late 1980's by Ratcliff and Obershelp under the
  57. // hyperbolic name "gestalt pattern matching". The basic idea is to find
  58. // the longest contiguous matching subsequence that contains no "junk"
  59. // elements (R-O doesn't address junk). The same idea is then applied
  60. // recursively to the pieces of the sequences to the left and to the right
  61. // of the matching subsequence. This does not yield minimal edit
  62. // sequences, but does tend to yield matches that "look right" to people.
  63. //
  64. // SequenceMatcher tries to compute a "human-friendly diff" between two
  65. // sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the
  66. // longest *contiguous* & junk-free matching subsequence. That's what
  67. // catches peoples' eyes. The Windows(tm) windiff has another interesting
  68. // notion, pairing up elements that appear uniquely in each sequence.
  69. // That, and the method here, appear to yield more intuitive difference
  70. // reports than does diff. This method appears to be the least vulnerable
  71. // to synching up on blocks of "junk lines", though (like blank lines in
  72. // ordinary text files, or maybe "<P>" lines in HTML files). That may be
  73. // because this is the only method of the 3 that has a *concept* of
  74. // "junk" <wink>.
  75. //
  76. // Timing: Basic R-O is cubic time worst case and quadratic time expected
  77. // case. SequenceMatcher is quadratic time for the worst case and has
  78. // expected-case behavior dependent in a complicated way on how many
  79. // elements the sequences have in common; best case time is linear.
  80. type SequenceMatcher struct {
  81. a []string
  82. b []string
  83. b2j map[string][]int
  84. IsJunk func(string) bool
  85. autoJunk bool
  86. bJunk map[string]struct{}
  87. matchingBlocks []Match
  88. fullBCount map[string]int
  89. bPopular map[string]struct{}
  90. opCodes []OpCode
  91. }
  92. func NewMatcher(a, b []string) *SequenceMatcher {
  93. m := SequenceMatcher{autoJunk: true}
  94. m.SetSeqs(a, b)
  95. return &m
  96. }
  97. func NewMatcherWithJunk(a, b []string, autoJunk bool,
  98. isJunk func(string) bool) *SequenceMatcher {
  99. m := SequenceMatcher{IsJunk: isJunk, autoJunk: autoJunk}
  100. m.SetSeqs(a, b)
  101. return &m
  102. }
  103. // Set two sequences to be compared.
  104. func (m *SequenceMatcher) SetSeqs(a, b []string) {
  105. m.SetSeq1(a)
  106. m.SetSeq2(b)
  107. }
  108. // Set the first sequence to be compared. The second sequence to be compared is
  109. // not changed.
  110. //
  111. // SequenceMatcher computes and caches detailed information about the second
  112. // sequence, so if you want to compare one sequence S against many sequences,
  113. // use .SetSeq2(s) once and call .SetSeq1(x) repeatedly for each of the other
  114. // sequences.
  115. //
  116. // See also SetSeqs() and SetSeq2().
  117. func (m *SequenceMatcher) SetSeq1(a []string) {
  118. if &a == &m.a {
  119. return
  120. }
  121. m.a = a
  122. m.matchingBlocks = nil
  123. m.opCodes = nil
  124. }
  125. // Set the second sequence to be compared. The first sequence to be compared is
  126. // not changed.
  127. func (m *SequenceMatcher) SetSeq2(b []string) {
  128. if &b == &m.b {
  129. return
  130. }
  131. m.b = b
  132. m.matchingBlocks = nil
  133. m.opCodes = nil
  134. m.fullBCount = nil
  135. m.chainB()
  136. }
  137. func (m *SequenceMatcher) chainB() {
  138. // Populate line -> index mapping
  139. b2j := map[string][]int{}
  140. for i, s := range m.b {
  141. indices := b2j[s]
  142. indices = append(indices, i)
  143. b2j[s] = indices
  144. }
  145. // Purge junk elements
  146. m.bJunk = map[string]struct{}{}
  147. if m.IsJunk != nil {
  148. junk := m.bJunk
  149. for s, _ := range b2j {
  150. if m.IsJunk(s) {
  151. junk[s] = struct{}{}
  152. }
  153. }
  154. for s, _ := range junk {
  155. delete(b2j, s)
  156. }
  157. }
  158. // Purge remaining popular elements
  159. popular := map[string]struct{}{}
  160. n := len(m.b)
  161. if m.autoJunk && n >= 200 {
  162. ntest := n/100 + 1
  163. for s, indices := range b2j {
  164. if len(indices) > ntest {
  165. popular[s] = struct{}{}
  166. }
  167. }
  168. for s, _ := range popular {
  169. delete(b2j, s)
  170. }
  171. }
  172. m.bPopular = popular
  173. m.b2j = b2j
  174. }
  175. func (m *SequenceMatcher) isBJunk(s string) bool {
  176. _, ok := m.bJunk[s]
  177. return ok
  178. }
  179. // Find longest matching block in a[alo:ahi] and b[blo:bhi].
  180. //
  181. // If IsJunk is not defined:
  182. //
  183. // Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
  184. // alo <= i <= i+k <= ahi
  185. // blo <= j <= j+k <= bhi
  186. // and for all (i',j',k') meeting those conditions,
  187. // k >= k'
  188. // i <= i'
  189. // and if i == i', j <= j'
  190. //
  191. // In other words, of all maximal matching blocks, return one that
  192. // starts earliest in a, and of all those maximal matching blocks that
  193. // start earliest in a, return the one that starts earliest in b.
  194. //
  195. // If IsJunk is defined, first the longest matching block is
  196. // determined as above, but with the additional restriction that no
  197. // junk element appears in the block. Then that block is extended as
  198. // far as possible by matching (only) junk elements on both sides. So
  199. // the resulting block never matches on junk except as identical junk
  200. // happens to be adjacent to an "interesting" match.
  201. //
  202. // If no blocks match, return (alo, blo, 0).
  203. func (m *SequenceMatcher) findLongestMatch(alo, ahi, blo, bhi int) Match {
  204. // CAUTION: stripping common prefix or suffix would be incorrect.
  205. // E.g.,
  206. // ab
  207. // acab
  208. // Longest matching block is "ab", but if common prefix is
  209. // stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
  210. // strip, so ends up claiming that ab is changed to acab by
  211. // inserting "ca" in the middle. That's minimal but unintuitive:
  212. // "it's obvious" that someone inserted "ac" at the front.
  213. // Windiff ends up at the same place as diff, but by pairing up
  214. // the unique 'b's and then matching the first two 'a's.
  215. besti, bestj, bestsize := alo, blo, 0
  216. // find longest junk-free match
  217. // during an iteration of the loop, j2len[j] = length of longest
  218. // junk-free match ending with a[i-1] and b[j]
  219. j2len := map[int]int{}
  220. for i := alo; i != ahi; i++ {
  221. // look at all instances of a[i] in b; note that because
  222. // b2j has no junk keys, the loop is skipped if a[i] is junk
  223. newj2len := map[int]int{}
  224. for _, j := range m.b2j[m.a[i]] {
  225. // a[i] matches b[j]
  226. if j < blo {
  227. continue
  228. }
  229. if j >= bhi {
  230. break
  231. }
  232. k := j2len[j-1] + 1
  233. newj2len[j] = k
  234. if k > bestsize {
  235. besti, bestj, bestsize = i-k+1, j-k+1, k
  236. }
  237. }
  238. j2len = newj2len
  239. }
  240. // Extend the best by non-junk elements on each end. In particular,
  241. // "popular" non-junk elements aren't in b2j, which greatly speeds
  242. // the inner loop above, but also means "the best" match so far
  243. // doesn't contain any junk *or* popular non-junk elements.
  244. for besti > alo && bestj > blo && !m.isBJunk(m.b[bestj-1]) &&
  245. m.a[besti-1] == m.b[bestj-1] {
  246. besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
  247. }
  248. for besti+bestsize < ahi && bestj+bestsize < bhi &&
  249. !m.isBJunk(m.b[bestj+bestsize]) &&
  250. m.a[besti+bestsize] == m.b[bestj+bestsize] {
  251. bestsize += 1
  252. }
  253. // Now that we have a wholly interesting match (albeit possibly
  254. // empty!), we may as well suck up the matching junk on each
  255. // side of it too. Can't think of a good reason not to, and it
  256. // saves post-processing the (possibly considerable) expense of
  257. // figuring out what to do with it. In the case of an empty
  258. // interesting match, this is clearly the right thing to do,
  259. // because no other kind of match is possible in the regions.
  260. for besti > alo && bestj > blo && m.isBJunk(m.b[bestj-1]) &&
  261. m.a[besti-1] == m.b[bestj-1] {
  262. besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
  263. }
  264. for besti+bestsize < ahi && bestj+bestsize < bhi &&
  265. m.isBJunk(m.b[bestj+bestsize]) &&
  266. m.a[besti+bestsize] == m.b[bestj+bestsize] {
  267. bestsize += 1
  268. }
  269. return Match{A: besti, B: bestj, Size: bestsize}
  270. }
  271. // Return list of triples describing matching subsequences.
  272. //
  273. // Each triple is of the form (i, j, n), and means that
  274. // a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in
  275. // i and in j. It's also guaranteed that if (i, j, n) and (i', j', n') are
  276. // adjacent triples in the list, and the second is not the last triple in the
  277. // list, then i+n != i' or j+n != j'. IOW, adjacent triples never describe
  278. // adjacent equal blocks.
  279. //
  280. // The last triple is a dummy, (len(a), len(b), 0), and is the only
  281. // triple with n==0.
  282. func (m *SequenceMatcher) GetMatchingBlocks() []Match {
  283. if m.matchingBlocks != nil {
  284. return m.matchingBlocks
  285. }
  286. var matchBlocks func(alo, ahi, blo, bhi int, matched []Match) []Match
  287. matchBlocks = func(alo, ahi, blo, bhi int, matched []Match) []Match {
  288. match := m.findLongestMatch(alo, ahi, blo, bhi)
  289. i, j, k := match.A, match.B, match.Size
  290. if match.Size > 0 {
  291. if alo < i && blo < j {
  292. matched = matchBlocks(alo, i, blo, j, matched)
  293. }
  294. matched = append(matched, match)
  295. if i+k < ahi && j+k < bhi {
  296. matched = matchBlocks(i+k, ahi, j+k, bhi, matched)
  297. }
  298. }
  299. return matched
  300. }
  301. matched := matchBlocks(0, len(m.a), 0, len(m.b), nil)
  302. // It's possible that we have adjacent equal blocks in the
  303. // matching_blocks list now.
  304. nonAdjacent := []Match{}
  305. i1, j1, k1 := 0, 0, 0
  306. for _, b := range matched {
  307. // Is this block adjacent to i1, j1, k1?
  308. i2, j2, k2 := b.A, b.B, b.Size
  309. if i1+k1 == i2 && j1+k1 == j2 {
  310. // Yes, so collapse them -- this just increases the length of
  311. // the first block by the length of the second, and the first
  312. // block so lengthened remains the block to compare against.
  313. k1 += k2
  314. } else {
  315. // Not adjacent. Remember the first block (k1==0 means it's
  316. // the dummy we started with), and make the second block the
  317. // new block to compare against.
  318. if k1 > 0 {
  319. nonAdjacent = append(nonAdjacent, Match{i1, j1, k1})
  320. }
  321. i1, j1, k1 = i2, j2, k2
  322. }
  323. }
  324. if k1 > 0 {
  325. nonAdjacent = append(nonAdjacent, Match{i1, j1, k1})
  326. }
  327. nonAdjacent = append(nonAdjacent, Match{len(m.a), len(m.b), 0})
  328. m.matchingBlocks = nonAdjacent
  329. return m.matchingBlocks
  330. }
  331. // Return list of 5-tuples describing how to turn a into b.
  332. //
  333. // Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple
  334. // has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
  335. // tuple preceding it, and likewise for j1 == the previous j2.
  336. //
  337. // The tags are characters, with these meanings:
  338. //
  339. // 'r' (replace): a[i1:i2] should be replaced by b[j1:j2]
  340. //
  341. // 'd' (delete): a[i1:i2] should be deleted, j1==j2 in this case.
  342. //
  343. // 'i' (insert): b[j1:j2] should be inserted at a[i1:i1], i1==i2 in this case.
  344. //
  345. // 'e' (equal): a[i1:i2] == b[j1:j2]
  346. func (m *SequenceMatcher) GetOpCodes() []OpCode {
  347. if m.opCodes != nil {
  348. return m.opCodes
  349. }
  350. i, j := 0, 0
  351. matching := m.GetMatchingBlocks()
  352. opCodes := make([]OpCode, 0, len(matching))
  353. for _, m := range matching {
  354. // invariant: we've pumped out correct diffs to change
  355. // a[:i] into b[:j], and the next matching block is
  356. // a[ai:ai+size] == b[bj:bj+size]. So we need to pump
  357. // out a diff to change a[i:ai] into b[j:bj], pump out
  358. // the matching block, and move (i,j) beyond the match
  359. ai, bj, size := m.A, m.B, m.Size
  360. tag := byte(0)
  361. if i < ai && j < bj {
  362. tag = 'r'
  363. } else if i < ai {
  364. tag = 'd'
  365. } else if j < bj {
  366. tag = 'i'
  367. }
  368. if tag > 0 {
  369. opCodes = append(opCodes, OpCode{tag, i, ai, j, bj})
  370. }
  371. i, j = ai+size, bj+size
  372. // the list of matching blocks is terminated by a
  373. // sentinel with size 0
  374. if size > 0 {
  375. opCodes = append(opCodes, OpCode{'e', ai, i, bj, j})
  376. }
  377. }
  378. m.opCodes = opCodes
  379. return m.opCodes
  380. }
  381. // Isolate change clusters by eliminating ranges with no changes.
  382. //
  383. // Return a generator of groups with up to n lines of context.
  384. // Each group is in the same format as returned by GetOpCodes().
  385. func (m *SequenceMatcher) GetGroupedOpCodes(n int) [][]OpCode {
  386. if n < 0 {
  387. n = 3
  388. }
  389. codes := m.GetOpCodes()
  390. if len(codes) == 0 {
  391. codes = []OpCode{OpCode{'e', 0, 1, 0, 1}}
  392. }
  393. // Fixup leading and trailing groups if they show no changes.
  394. if codes[0].Tag == 'e' {
  395. c := codes[0]
  396. i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
  397. codes[0] = OpCode{c.Tag, max(i1, i2-n), i2, max(j1, j2-n), j2}
  398. }
  399. if codes[len(codes)-1].Tag == 'e' {
  400. c := codes[len(codes)-1]
  401. i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
  402. codes[len(codes)-1] = OpCode{c.Tag, i1, min(i2, i1+n), j1, min(j2, j1+n)}
  403. }
  404. nn := n + n
  405. groups := [][]OpCode{}
  406. group := []OpCode{}
  407. for _, c := range codes {
  408. i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
  409. // End the current group and start a new one whenever
  410. // there is a large range with no changes.
  411. if c.Tag == 'e' && i2-i1 > nn {
  412. group = append(group, OpCode{c.Tag, i1, min(i2, i1+n),
  413. j1, min(j2, j1+n)})
  414. groups = append(groups, group)
  415. group = []OpCode{}
  416. i1, j1 = max(i1, i2-n), max(j1, j2-n)
  417. }
  418. group = append(group, OpCode{c.Tag, i1, i2, j1, j2})
  419. }
  420. if len(group) > 0 && !(len(group) == 1 && group[0].Tag == 'e') {
  421. groups = append(groups, group)
  422. }
  423. return groups
  424. }
  425. // Return a measure of the sequences' similarity (float in [0,1]).
  426. //
  427. // Where T is the total number of elements in both sequences, and
  428. // M is the number of matches, this is 2.0*M / T.
  429. // Note that this is 1 if the sequences are identical, and 0 if
  430. // they have nothing in common.
  431. //
  432. // .Ratio() is expensive to compute if you haven't already computed
  433. // .GetMatchingBlocks() or .GetOpCodes(), in which case you may
  434. // want to try .QuickRatio() or .RealQuickRation() first to get an
  435. // upper bound.
  436. func (m *SequenceMatcher) Ratio() float64 {
  437. matches := 0
  438. for _, m := range m.GetMatchingBlocks() {
  439. matches += m.Size
  440. }
  441. return calculateRatio(matches, len(m.a)+len(m.b))
  442. }
  443. // Return an upper bound on ratio() relatively quickly.
  444. //
  445. // This isn't defined beyond that it is an upper bound on .Ratio(), and
  446. // is faster to compute.
  447. func (m *SequenceMatcher) QuickRatio() float64 {
  448. // viewing a and b as multisets, set matches to the cardinality
  449. // of their intersection; this counts the number of matches
  450. // without regard to order, so is clearly an upper bound
  451. if m.fullBCount == nil {
  452. m.fullBCount = map[string]int{}
  453. for _, s := range m.b {
  454. m.fullBCount[s] = m.fullBCount[s] + 1
  455. }
  456. }
  457. // avail[x] is the number of times x appears in 'b' less the
  458. // number of times we've seen it in 'a' so far ... kinda
  459. avail := map[string]int{}
  460. matches := 0
  461. for _, s := range m.a {
  462. n, ok := avail[s]
  463. if !ok {
  464. n = m.fullBCount[s]
  465. }
  466. avail[s] = n - 1
  467. if n > 0 {
  468. matches += 1
  469. }
  470. }
  471. return calculateRatio(matches, len(m.a)+len(m.b))
  472. }
  473. // Return an upper bound on ratio() very quickly.
  474. //
  475. // This isn't defined beyond that it is an upper bound on .Ratio(), and
  476. // is faster to compute than either .Ratio() or .QuickRatio().
  477. func (m *SequenceMatcher) RealQuickRatio() float64 {
  478. la, lb := len(m.a), len(m.b)
  479. return calculateRatio(min(la, lb), la+lb)
  480. }
  481. // Convert range to the "ed" format
  482. func formatRangeUnified(start, stop int) string {
  483. // Per the diff spec at http://www.unix.org/single_unix_specification/
  484. beginning := start + 1 // lines start numbering with one
  485. length := stop - start
  486. if length == 1 {
  487. return fmt.Sprintf("%d", beginning)
  488. }
  489. if length == 0 {
  490. beginning -= 1 // empty ranges begin at line just before the range
  491. }
  492. return fmt.Sprintf("%d,%d", beginning, length)
  493. }
  494. // Unified diff parameters
  495. type UnifiedDiff struct {
  496. A []string // First sequence lines
  497. FromFile string // First file name
  498. FromDate string // First file time
  499. B []string // Second sequence lines
  500. ToFile string // Second file name
  501. ToDate string // Second file time
  502. Eol string // Headers end of line, defaults to LF
  503. Context int // Number of context lines
  504. }
  505. // Compare two sequences of lines; generate the delta as a unified diff.
  506. //
  507. // Unified diffs are a compact way of showing line changes and a few
  508. // lines of context. The number of context lines is set by 'n' which
  509. // defaults to three.
  510. //
  511. // By default, the diff control lines (those with ---, +++, or @@) are
  512. // created with a trailing newline. This is helpful so that inputs
  513. // created from file.readlines() result in diffs that are suitable for
  514. // file.writelines() since both the inputs and outputs have trailing
  515. // newlines.
  516. //
  517. // For inputs that do not have trailing newlines, set the lineterm
  518. // argument to "" so that the output will be uniformly newline free.
  519. //
  520. // The unidiff format normally has a header for filenames and modification
  521. // times. Any or all of these may be specified using strings for
  522. // 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
  523. // The modification times are normally expressed in the ISO 8601 format.
  524. func WriteUnifiedDiff(writer io.Writer, diff UnifiedDiff) error {
  525. buf := bufio.NewWriter(writer)
  526. defer buf.Flush()
  527. w := func(format string, args ...interface{}) error {
  528. _, err := buf.WriteString(fmt.Sprintf(format, args...))
  529. return err
  530. }
  531. if len(diff.Eol) == 0 {
  532. diff.Eol = "\n"
  533. }
  534. started := false
  535. m := NewMatcher(diff.A, diff.B)
  536. for _, g := range m.GetGroupedOpCodes(diff.Context) {
  537. if !started {
  538. started = true
  539. fromDate := ""
  540. if len(diff.FromDate) > 0 {
  541. fromDate = "\t" + diff.FromDate
  542. }
  543. toDate := ""
  544. if len(diff.ToDate) > 0 {
  545. toDate = "\t" + diff.ToDate
  546. }
  547. err := w("--- %s%s%s", diff.FromFile, fromDate, diff.Eol)
  548. if err != nil {
  549. return err
  550. }
  551. err = w("+++ %s%s%s", diff.ToFile, toDate, diff.Eol)
  552. if err != nil {
  553. return err
  554. }
  555. }
  556. first, last := g[0], g[len(g)-1]
  557. range1 := formatRangeUnified(first.I1, last.I2)
  558. range2 := formatRangeUnified(first.J1, last.J2)
  559. if err := w("@@ -%s +%s @@%s", range1, range2, diff.Eol); err != nil {
  560. return err
  561. }
  562. for _, c := range g {
  563. i1, i2, j1, j2 := c.I1, c.I2, c.J1, c.J2
  564. if c.Tag == 'e' {
  565. for _, line := range diff.A[i1:i2] {
  566. if err := w(" " + line); err != nil {
  567. return err
  568. }
  569. }
  570. continue
  571. }
  572. if c.Tag == 'r' || c.Tag == 'd' {
  573. for _, line := range diff.A[i1:i2] {
  574. if err := w("-" + line); err != nil {
  575. return err
  576. }
  577. }
  578. }
  579. if c.Tag == 'r' || c.Tag == 'i' {
  580. for _, line := range diff.B[j1:j2] {
  581. if err := w("+" + line); err != nil {
  582. return err
  583. }
  584. }
  585. }
  586. }
  587. }
  588. return nil
  589. }
  590. // Like WriteUnifiedDiff but returns the diff a string.
  591. func GetUnifiedDiffString(diff UnifiedDiff) (string, error) {
  592. w := &bytes.Buffer{}
  593. err := WriteUnifiedDiff(w, diff)
  594. return string(w.Bytes()), err
  595. }
  596. // Convert range to the "ed" format.
  597. func formatRangeContext(start, stop int) string {
  598. // Per the diff spec at http://www.unix.org/single_unix_specification/
  599. beginning := start + 1 // lines start numbering with one
  600. length := stop - start
  601. if length == 0 {
  602. beginning -= 1 // empty ranges begin at line just before the range
  603. }
  604. if length <= 1 {
  605. return fmt.Sprintf("%d", beginning)
  606. }
  607. return fmt.Sprintf("%d,%d", beginning, beginning+length-1)
  608. }
  609. type ContextDiff UnifiedDiff
  610. // Compare two sequences of lines; generate the delta as a context diff.
  611. //
  612. // Context diffs are a compact way of showing line changes and a few
  613. // lines of context. The number of context lines is set by diff.Context
  614. // which defaults to three.
  615. //
  616. // By default, the diff control lines (those with *** or ---) are
  617. // created with a trailing newline.
  618. //
  619. // For inputs that do not have trailing newlines, set the diff.Eol
  620. // argument to "" so that the output will be uniformly newline free.
  621. //
  622. // The context diff format normally has a header for filenames and
  623. // modification times. Any or all of these may be specified using
  624. // strings for diff.FromFile, diff.ToFile, diff.FromDate, diff.ToDate.
  625. // The modification times are normally expressed in the ISO 8601 format.
  626. // If not specified, the strings default to blanks.
  627. func WriteContextDiff(writer io.Writer, diff ContextDiff) error {
  628. buf := bufio.NewWriter(writer)
  629. defer buf.Flush()
  630. var diffErr error
  631. w := func(format string, args ...interface{}) {
  632. _, err := buf.WriteString(fmt.Sprintf(format, args...))
  633. if diffErr == nil && err != nil {
  634. diffErr = err
  635. }
  636. }
  637. if len(diff.Eol) == 0 {
  638. diff.Eol = "\n"
  639. }
  640. prefix := map[byte]string{
  641. 'i': "+ ",
  642. 'd': "- ",
  643. 'r': "! ",
  644. 'e': " ",
  645. }
  646. started := false
  647. m := NewMatcher(diff.A, diff.B)
  648. for _, g := range m.GetGroupedOpCodes(diff.Context) {
  649. if !started {
  650. started = true
  651. fromDate := ""
  652. if len(diff.FromDate) > 0 {
  653. fromDate = "\t" + diff.FromDate
  654. }
  655. toDate := ""
  656. if len(diff.ToDate) > 0 {
  657. toDate = "\t" + diff.ToDate
  658. }
  659. w("*** %s%s%s", diff.FromFile, fromDate, diff.Eol)
  660. w("--- %s%s%s", diff.ToFile, toDate, diff.Eol)
  661. }
  662. first, last := g[0], g[len(g)-1]
  663. w("***************" + diff.Eol)
  664. range1 := formatRangeContext(first.I1, last.I2)
  665. w("*** %s ****%s", range1, diff.Eol)
  666. for _, c := range g {
  667. if c.Tag == 'r' || c.Tag == 'd' {
  668. for _, cc := range g {
  669. if cc.Tag == 'i' {
  670. continue
  671. }
  672. for _, line := range diff.A[cc.I1:cc.I2] {
  673. w(prefix[cc.Tag] + line)
  674. }
  675. }
  676. break
  677. }
  678. }
  679. range2 := formatRangeContext(first.J1, last.J2)
  680. w("--- %s ----%s", range2, diff.Eol)
  681. for _, c := range g {
  682. if c.Tag == 'r' || c.Tag == 'i' {
  683. for _, cc := range g {
  684. if cc.Tag == 'd' {
  685. continue
  686. }
  687. for _, line := range diff.B[cc.J1:cc.J2] {
  688. w(prefix[cc.Tag] + line)
  689. }
  690. }
  691. break
  692. }
  693. }
  694. }
  695. return diffErr
  696. }
  697. // Like WriteContextDiff but returns the diff a string.
  698. func GetContextDiffString(diff ContextDiff) (string, error) {
  699. w := &bytes.Buffer{}
  700. err := WriteContextDiff(w, diff)
  701. return string(w.Bytes()), err
  702. }
  703. // Split a string on "\n" while preserving them. The output can be used
  704. // as input for UnifiedDiff and ContextDiff structures.
  705. func SplitLines(s string) []string {
  706. lines := strings.SplitAfter(s, "\n")
  707. lines[len(lines)-1] += "\n"
  708. return lines
  709. }