From 0a511de994d6e2d29b1d628fea9bacace88b720e Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Mon, 30 Dec 2019 15:43:12 -0800 Subject: [PATCH] remove adapters, create wrappers package diff now has a simple, single-call API. for more control, people can use the subpackages. Part of #18. --- adapter.go | 103 -------------------------- cmd/pkg-diff-example/main.go | 47 ++---------- diff.go | 135 +++++++++++++++++++++++++++++++++++ doc.go | 10 --- example_test.go | 70 ++++++++---------- todo.go | 31 ++++++++ write/unified_test.go | 14 +++- 7 files changed, 211 insertions(+), 199 deletions(-) delete mode 100644 adapter.go delete mode 100644 doc.go create mode 100644 todo.go diff --git a/adapter.go b/adapter.go deleted file mode 100644 index 64a5c95..0000000 --- a/adapter.go +++ /dev/null @@ -1,103 +0,0 @@ -package diff - -import ( - "bytes" - "fmt" - "io" - "reflect" - - "github.com/pkg/diff/myers" - "github.com/pkg/diff/write" -) - -// DiffWrite is the union of myers.Pair and write.Pair: -// It can be diffed using myers diff, and written in unified diff format. -type DiffWrite interface { - myers.Pair - write.Pair -} - -// Strings returns a DiffWrite that can diff and write a and b. -func Strings(a, b []string) DiffWrite { - return &diffStrings{a: a, b: b} -} - -type diffStrings struct { - a, b []string -} - -func (ab *diffStrings) LenA() int { return len(ab.a) } -func (ab *diffStrings) LenB() int { return len(ab.b) } -func (ab *diffStrings) Equal(ai, bi int) bool { return ab.a[ai] == ab.b[bi] } -func (ab *diffStrings) WriteATo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.a[i]) } -func (ab *diffStrings) WriteBTo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.b[i]) } - -// Bytes returns a DiffWrite that can diff and write a and b. -func Bytes(a, b [][]byte) DiffWrite { - return &diffBytes{a: a, b: b} -} - -type diffBytes struct { - a, b [][]byte -} - -func (ab *diffBytes) LenA() int { return len(ab.a) } -func (ab *diffBytes) LenB() int { return len(ab.b) } -func (ab *diffBytes) Equal(ai, bi int) bool { return bytes.Equal(ab.a[ai], ab.b[bi]) } -func (ab *diffBytes) WriteATo(w io.Writer, i int) (int, error) { return w.Write(ab.a[i]) } -func (ab *diffBytes) WriteBTo(w io.Writer, i int) (int, error) { return w.Write(ab.b[i]) } - -// Slices returns a DiffWrite that diffs a and b. -// It uses fmt.Print to print the elements of a and b. -// It uses equal to compare elements of a and b; -// if equal is nil, Slices uses reflect.DeepEqual. -func Slices(a, b interface{}, equal func(x, y interface{}) bool) DiffWrite { - if equal == nil { - equal = reflect.DeepEqual - } - ab := &diffSlices{a: reflect.ValueOf(a), b: reflect.ValueOf(b), eq: equal} - if ab.a.Type().Kind() != reflect.Slice || ab.b.Type().Kind() != reflect.Slice { - panic(fmt.Errorf("diff.Slices called with a non-slice argument: %T, %T", a, b)) - } - return ab -} - -type diffSlices struct { - a, b reflect.Value - eq func(x, y interface{}) bool -} - -func (ab *diffSlices) LenA() int { return ab.a.Len() } -func (ab *diffSlices) LenB() int { return ab.b.Len() } -func (ab *diffSlices) atA(i int) interface{} { return ab.a.Index(i).Interface() } -func (ab *diffSlices) atB(i int) interface{} { return ab.b.Index(i).Interface() } -func (ab *diffSlices) Equal(ai, bi int) bool { return ab.eq(ab.atA(ai), ab.atB(bi)) } -func (ab *diffSlices) WriteATo(w io.Writer, i int) (int, error) { return fmt.Fprint(w, ab.atA(i)) } -func (ab *diffSlices) WriteBTo(w io.Writer, i int) (int, error) { return fmt.Fprint(w, ab.atB(i)) } - -// TODO: consider adding a LargeFile wrapper. -// It should read each file once, storing the location of all newlines in each file, -// probably using a compact, delta-based encoding. -// Then Seek/ReadAt to read each line lazily as needed, relying on the OS page cache for performance. -// This will allow diffing giant files with low memory use, at a significant time cost. -// An alternative is to mmap the files, although this is OS-specific and can be fiddly. - -// TODO: consider adding a StringIntern type, something like: -// -// type StringIntern struct { -// s map[string]*string -// } -// -// func (i *StringIntern) Bytes(b []byte) *string -// func (i *StringIntern) String(s string) *string -// -// And document what it is and why to use it. -// And consider adding helper functions to Strings and Bytes to use it. -// The reason to use it is that a lot of the execution time in diffing -// (which is an expensive operation) is taken up doing string comparisons. -// If you have paid the O(n) cost to intern all strings involved in both A and B, -// then string comparisons are reduced to cheap pointer comparisons. - -// TODO: consider adding an "it just works" test helper that accepts two slices (via interface{}), -// diffs them using Strings or Bytes or Slices (using reflect.DeepEqual) as appropriate, -// and calls t.Errorf with a generated diff if they're not equal. diff --git a/cmd/pkg-diff-example/main.go b/cmd/pkg-diff-example/main.go index 14d4ffb..b68042d 100644 --- a/cmd/pkg-diff-example/main.go +++ b/cmd/pkg-diff-example/main.go @@ -3,24 +3,16 @@ package main import ( - "bufio" - "context" "flag" "fmt" "log" "os" "github.com/pkg/diff" - "github.com/pkg/diff/ctxt" - "github.com/pkg/diff/myers" "github.com/pkg/diff/write" ) -var ( - color = flag.Bool("color", false, "colorize the output") - timeout = flag.Duration("timeout", 0, "timeout") - unified = flag.Int("unified", 3, "lines of unified context") -) +var color = flag.Bool("color", false, "colorize the output") // check logs a fatal error and exits if err is not nil. func check(err error) { @@ -29,22 +21,6 @@ func check(err error) { } } -// fileLines returns the lines of the file called name. -func fileLines(name string) ([]string, error) { - f, err := os.Open(name) - if err != nil { - return nil, err - } - defer f.Close() - - var lines []string - s := bufio.NewScanner(f) - for s.Scan() { - lines = append(lines, s.Text()) - } - return lines, s.Err() -} - func usage() { fmt.Fprintf(os.Stderr, "pkg-diff-example [flags] file1 file2\n") flag.PrintDefaults() @@ -62,28 +38,13 @@ func main() { } aName := flag.Arg(0) - aLines, err := fileLines(aName) - check(err) - bName := flag.Arg(1) - bLines, err := fileLines(bName) - check(err) - ab := diff.Strings(aLines, bLines) - ctx := context.Background() - if *timeout != 0 { - var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, *timeout) - defer cancel() - } - e := myers.Diff(ctx, ab) - e = ctxt.Size(e, *unified) // limit amount of output context - opts := []write.Option{ - write.Names(aName, bName), - } + var opts []write.Option if *color { opts = append(opts, write.TerminalColor()) } - _, err = write.Unified(e, os.Stdout, ab, opts...) + + err := diff.Text(aName, bName, nil, nil, os.Stdout, opts...) check(err) } diff --git a/diff.go b/diff.go index f8689a2..c44c2a7 100644 --- a/diff.go +++ b/diff.go @@ -1 +1,136 @@ +// Package diff and its subpackages create, modify, and print diffs. +// +// Package diff contains high level routines that generate a textual diff. +// It is implemented in terms of the diff/* subpackages. +// If you want fine-grained control, +// want to inspect an diff programmatically, +// want to provide a context for cancellation, +// need to diff gigantic files that don't fit in memory, +// or want to diff unusual things, +// use the subpackages. package diff + +import ( + "bufio" + "bytes" + "context" + "fmt" + "io" + "os" + "reflect" + "strings" + + "github.com/pkg/diff/ctxt" + "github.com/pkg/diff/myers" + "github.com/pkg/diff/write" +) + +// lines returns the lines contained in text/filename. +// text and filename are interpreted as described in the docs for Text. +func lines(filename string, text interface{}) ([]string, error) { + var r io.Reader + switch text := text.(type) { + case nil: + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer f.Close() + r = f + case string: + r = strings.NewReader(text) + case []byte: + r = bytes.NewReader(text) + case io.Reader: + r = text + default: + return nil, fmt.Errorf("unexpected type %T, want string, []byte, io.Reader, or nil", text) + } + var x []string + scan := bufio.NewScanner(r) + for scan.Scan() { + // TODO: intern? See intern comment in todo.go. + x = append(x, scan.Text()) + } + return x, scan.Err() +} + +// addNames adds a Names write.Option using aName and bName, +// taking care to put it at the end, +// so as not to overwrite any competing option. +func addNames(aName, bName string, options []write.Option) []write.Option { + opts := make([]write.Option, len(options)+1) + opts[0] = write.Names(aName, bName) + copy(opts[1:], options) + return opts +} + +// Text diffs a and b and writes the result to w. +// It treats a and b as text, and splits them into lines using bufio.ScanLines. +// aFile and bFile are filenames to use in the output. +// a and b each may be nil or may have type string, []byte, or io.Reader. +// If nil, the text is read from the filename. +func Text(aFile, bFile string, a, b interface{}, w io.Writer, options ...write.Option) error { + aLines, err := lines(aFile, a) + if err != nil { + return err + } + bLines, err := lines(bFile, b) + if err != nil { + return err + } + ab := &diffStrings{a: aLines, b: bLines} + s := myers.Diff(context.Background(), ab) + s = ctxt.Size(s, 3) + opts := addNames(aFile, bFile, options) + _, err = write.Unified(s, w, ab, opts...) + return err +} + +type diffStrings struct { + a, b []string +} + +func (ab *diffStrings) LenA() int { return len(ab.a) } +func (ab *diffStrings) LenB() int { return len(ab.b) } +func (ab *diffStrings) Equal(ai, bi int) bool { return ab.a[ai] == ab.b[bi] } +func (ab *diffStrings) WriteATo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.a[i]) } +func (ab *diffStrings) WriteBTo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.b[i]) } + +// Slices diffs slices a and b and writes the result to w. +// It uses fmt.Print to print the elements of a and b. +// It uses reflect.DeepEqual to compare elements of a and b. +// It uses aName and bName as the names of a and b in the output. +func Slices(aName, bName string, a, b interface{}, w io.Writer, options ...write.Option) error { + ab := &diffSlices{a: reflect.ValueOf(a), b: reflect.ValueOf(b)} + if err := ab.validateTypes(); err != nil { + return err + } + s := myers.Diff(context.Background(), ab) + s = ctxt.Size(s, 3) + opts := addNames(aName, bName, options) + _, err := write.Unified(s, w, ab, opts...) + return err +} + +type diffSlices struct { + a, b reflect.Value +} + +func (ab *diffSlices) LenA() int { return ab.a.Len() } +func (ab *diffSlices) LenB() int { return ab.b.Len() } +func (ab *diffSlices) atA(i int) interface{} { return ab.a.Index(i).Interface() } +func (ab *diffSlices) atB(i int) interface{} { return ab.b.Index(i).Interface() } +func (ab *diffSlices) Equal(ai, bi int) bool { return reflect.DeepEqual(ab.atA(ai), ab.atB(bi)) } +func (ab *diffSlices) WriteATo(w io.Writer, i int) (int, error) { return fmt.Fprint(w, ab.atA(i)) } +func (ab *diffSlices) WriteBTo(w io.Writer, i int) (int, error) { return fmt.Fprint(w, ab.atB(i)) } + +func (ab *diffSlices) validateTypes() error { + if t := ab.a.Type(); t.Kind() != reflect.Slice { + return fmt.Errorf("a has type %v, must be a slice", t) + } + if t := ab.b.Type(); t.Kind() != reflect.Slice { + return fmt.Errorf("b has type %v, must be a slice", t) + } + return nil +} diff --git a/doc.go b/doc.go deleted file mode 100644 index 1004526..0000000 --- a/doc.go +++ /dev/null @@ -1,10 +0,0 @@ -/* - -Package diff creates and prints diffs. - -It is UNSTABLE. The API will change and there are bugs. - -TODO: more docs. But what, exactly? - -*/ -package diff diff --git a/example_test.go b/example_test.go index 2197b92..72571a7 100644 --- a/example_test.go +++ b/example_test.go @@ -1,42 +1,46 @@ package diff_test import ( - "context" "os" "github.com/pkg/diff" - "github.com/pkg/diff/ctxt" - "github.com/pkg/diff/myers" - "github.com/pkg/diff/write" ) -// TODO: use a less heavyweight output format for Example_testHelper - -func Example_testHelper() { - want := []int{1, 2, 3, 4, 5} - got := []int{1, 2, 4, 5} - ab := diff.Slices(want, got, nil) - e := myers.Diff(context.Background(), ab) - if e.IsIdentity() { - return +func Example_Slices() { + want := []int{1, 2, 3, 4, 5, 6, 7, 8, 9} + got := []int{1, 2, 3, 4, 6, 7, 8, 9} + err := diff.Slices("want", "got", want, got, os.Stdout) + if err != nil { + panic(err) } - e = ctxt.Size(e, 1) - write.Unified(e, os.Stdout, ab) // Output: - // --- a - // +++ b - // @@ -2,3 +2,2 @@ + // --- want + // +++ got + // @@ -2,7 +2,6 @@ // 2 - // -3 + // 3 // 4 + // -5 + // 6 + // 7 + // 8 } -func Example_strings() { - a := []string{"a", "b", "c"} - b := []string{"a", "c", "d"} - ab := diff.Strings(a, b) - e := myers.Diff(context.Background(), ab) - write.Unified(e, os.Stdout, ab) +func Example_Text() { + a := ` +a +b +c +`[1:] + b := ` +a +c +d +`[1:] + err := diff.Text("a", "b", a, b, os.Stdout) + if err != nil { + panic(err) + } // Output: // --- a // +++ b @@ -46,19 +50,3 @@ func Example_strings() { // c // +d } - -func Example_Names() { - a := []string{"a", "b", "c"} - b := []string{"a", "c", "d"} - ab := diff.Strings(a, b) - e := myers.Diff(context.Background(), ab) - write.Unified(e, os.Stdout, ab, write.Names("before", "after")) - // Output: - // --- before - // +++ after - // @@ -1,3 +1,3 @@ - // a - // -b - // c - // +d -} diff --git a/todo.go b/todo.go new file mode 100644 index 0000000..e586220 --- /dev/null +++ b/todo.go @@ -0,0 +1,31 @@ +package diff + +// TODO: add a package for diffing gigantic files. +// Instead of reading the entire thing into memory, we could +// scan through the file once, storing the location of all newlines in each file. +// Then Seek/ReadAt to read each line lazily as needed, +// relying on the OS page cache for performance. +// This will allow diffing giant files with low memory use, +// albeit at a some time cost. +// An alternative is to mmap the files, +// although this is OS-specific and can be fiddly. + +// TODO: add a package providing a StringIntern type, something like: +// +// type StringIntern struct { +// s map[string]*string +// } +// +// func (i *StringIntern) Bytes(b []byte) *string +// func (i *StringIntern) String(s string) *string +// +// And document what it is and why to use it. +// And consider adding helper functions to Strings and Bytes to use it. +// The reason to use it is that a lot of the execution time in diffing +// (which is an expensive operation) is taken up doing string comparisons. +// If you have paid the O(n) cost to intern all strings involved in both A and B, +// then string comparisons are reduced to cheap pointer comparisons. + +// TODO: consider adding an "it just works" test helper that accepts two slices (via interface{}), +// diffs them using Strings or Bytes or Slices (using reflect.DeepEqual) as appropriate, +// and calls t.Errorf with a generated diff if they're not equal. diff --git a/write/unified_test.go b/write/unified_test.go index b765864..4814798 100644 --- a/write/unified_test.go +++ b/write/unified_test.go @@ -3,10 +3,10 @@ package write_test import ( "bytes" "context" + "io" "strings" "testing" - "github.com/pkg/diff" "github.com/pkg/diff/ctxt" "github.com/pkg/diff/myers" "github.com/pkg/diff/write" @@ -79,7 +79,7 @@ func TestGolden(t *testing.T) { t.Run(test.name, func(t *testing.T) { as := strings.Split(test.a, "\n") bs := strings.Split(test.b, "\n") - ab := diff.Strings(as, bs) + ab := &diffStrings{a: as, b: bs} // TODO: supply an edit.Script to the tests instead doing a Myers diff here. // Doing it as I have done, the lazy way, mixes concerns: diff algorithm vs unification algorithm // vs unified diff formatting. @@ -102,3 +102,13 @@ func TestGolden(t *testing.T) { }) } } + +type diffStrings struct { + a, b []string +} + +func (ab *diffStrings) LenA() int { return len(ab.a) } +func (ab *diffStrings) LenB() int { return len(ab.b) } +func (ab *diffStrings) Equal(ai, bi int) bool { return ab.a[ai] == ab.b[bi] } +func (ab *diffStrings) WriteATo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.a[i]) } +func (ab *diffStrings) WriteBTo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.b[i]) }