From 0a511de994d6e2d29b1d628fea9bacace88b720e Mon Sep 17 00:00:00 2001
From: Josh Bleecher Snyder <josharian@gmail.com>
Date: Mon, 30 Dec 2019 15:43:12 -0800
Subject: [PATCH] remove adapters, create wrappers

package diff now has a simple, single-call API.
for more control, people can use the subpackages.

Part of #18.
---
 adapter.go                   | 103 --------------------------
 cmd/pkg-diff-example/main.go |  47 ++----------
 diff.go                      | 135 +++++++++++++++++++++++++++++++++++
 doc.go                       |  10 ---
 example_test.go              |  70 ++++++++----------
 todo.go                      |  31 ++++++++
 write/unified_test.go        |  14 +++-
 7 files changed, 211 insertions(+), 199 deletions(-)
 delete mode 100644 adapter.go
 delete mode 100644 doc.go
 create mode 100644 todo.go

diff --git a/adapter.go b/adapter.go
deleted file mode 100644
index 64a5c95..0000000
--- a/adapter.go
+++ /dev/null
@@ -1,103 +0,0 @@
-package diff
-
-import (
-	"bytes"
-	"fmt"
-	"io"
-	"reflect"
-
-	"github.com/pkg/diff/myers"
-	"github.com/pkg/diff/write"
-)
-
-// DiffWrite is the union of myers.Pair and write.Pair:
-// It can be diffed using myers diff, and written in unified diff format.
-type DiffWrite interface {
-	myers.Pair
-	write.Pair
-}
-
-// Strings returns a DiffWrite that can diff and write a and b.
-func Strings(a, b []string) DiffWrite {
-	return &diffStrings{a: a, b: b}
-}
-
-type diffStrings struct {
-	a, b []string
-}
-
-func (ab *diffStrings) LenA() int                                { return len(ab.a) }
-func (ab *diffStrings) LenB() int                                { return len(ab.b) }
-func (ab *diffStrings) Equal(ai, bi int) bool                    { return ab.a[ai] == ab.b[bi] }
-func (ab *diffStrings) WriteATo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.a[i]) }
-func (ab *diffStrings) WriteBTo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.b[i]) }
-
-// Bytes returns a DiffWrite that can diff and write a and b.
-func Bytes(a, b [][]byte) DiffWrite {
-	return &diffBytes{a: a, b: b}
-}
-
-type diffBytes struct {
-	a, b [][]byte
-}
-
-func (ab *diffBytes) LenA() int                                { return len(ab.a) }
-func (ab *diffBytes) LenB() int                                { return len(ab.b) }
-func (ab *diffBytes) Equal(ai, bi int) bool                    { return bytes.Equal(ab.a[ai], ab.b[bi]) }
-func (ab *diffBytes) WriteATo(w io.Writer, i int) (int, error) { return w.Write(ab.a[i]) }
-func (ab *diffBytes) WriteBTo(w io.Writer, i int) (int, error) { return w.Write(ab.b[i]) }
-
-// Slices returns a DiffWrite that diffs a and b.
-// It uses fmt.Print to print the elements of a and b.
-// It uses equal to compare elements of a and b;
-// if equal is nil, Slices uses reflect.DeepEqual.
-func Slices(a, b interface{}, equal func(x, y interface{}) bool) DiffWrite {
-	if equal == nil {
-		equal = reflect.DeepEqual
-	}
-	ab := &diffSlices{a: reflect.ValueOf(a), b: reflect.ValueOf(b), eq: equal}
-	if ab.a.Type().Kind() != reflect.Slice || ab.b.Type().Kind() != reflect.Slice {
-		panic(fmt.Errorf("diff.Slices called with a non-slice argument: %T, %T", a, b))
-	}
-	return ab
-}
-
-type diffSlices struct {
-	a, b reflect.Value
-	eq   func(x, y interface{}) bool
-}
-
-func (ab *diffSlices) LenA() int                                { return ab.a.Len() }
-func (ab *diffSlices) LenB() int                                { return ab.b.Len() }
-func (ab *diffSlices) atA(i int) interface{}                    { return ab.a.Index(i).Interface() }
-func (ab *diffSlices) atB(i int) interface{}                    { return ab.b.Index(i).Interface() }
-func (ab *diffSlices) Equal(ai, bi int) bool                    { return ab.eq(ab.atA(ai), ab.atB(bi)) }
-func (ab *diffSlices) WriteATo(w io.Writer, i int) (int, error) { return fmt.Fprint(w, ab.atA(i)) }
-func (ab *diffSlices) WriteBTo(w io.Writer, i int) (int, error) { return fmt.Fprint(w, ab.atB(i)) }
-
-// TODO: consider adding a LargeFile wrapper.
-// It should read each file once, storing the location of all newlines in each file,
-// probably using a compact, delta-based encoding.
-// Then Seek/ReadAt to read each line lazily as needed, relying on the OS page cache for performance.
-// This will allow diffing giant files with low memory use, at a significant time cost.
-// An alternative is to mmap the files, although this is OS-specific and can be fiddly.
-
-// TODO: consider adding a StringIntern type, something like:
-//
-// type StringIntern struct {
-// 	s map[string]*string
-// }
-//
-// func (i *StringIntern) Bytes(b []byte) *string
-// func (i *StringIntern) String(s string) *string
-//
-// And document what it is and why to use it.
-// And consider adding helper functions to Strings and Bytes to use it.
-// The reason to use it is that a lot of the execution time in diffing
-// (which is an expensive operation) is taken up doing string comparisons.
-// If you have paid the O(n) cost to intern all strings involved in both A and B,
-// then string comparisons are reduced to cheap pointer comparisons.
-
-// TODO: consider adding an "it just works" test helper that accepts two slices (via interface{}),
-// diffs them using Strings or Bytes or Slices (using reflect.DeepEqual) as appropriate,
-// and calls t.Errorf with a generated diff if they're not equal.
diff --git a/cmd/pkg-diff-example/main.go b/cmd/pkg-diff-example/main.go
index 14d4ffb..b68042d 100644
--- a/cmd/pkg-diff-example/main.go
+++ b/cmd/pkg-diff-example/main.go
@@ -3,24 +3,16 @@
 package main
 
 import (
-	"bufio"
-	"context"
 	"flag"
 	"fmt"
 	"log"
 	"os"
 
 	"github.com/pkg/diff"
-	"github.com/pkg/diff/ctxt"
-	"github.com/pkg/diff/myers"
 	"github.com/pkg/diff/write"
 )
 
-var (
-	color   = flag.Bool("color", false, "colorize the output")
-	timeout = flag.Duration("timeout", 0, "timeout")
-	unified = flag.Int("unified", 3, "lines of unified context")
-)
+var color = flag.Bool("color", false, "colorize the output")
 
 // check logs a fatal error and exits if err is not nil.
 func check(err error) {
@@ -29,22 +21,6 @@ func check(err error) {
 	}
 }
 
-// fileLines returns the lines of the file called name.
-func fileLines(name string) ([]string, error) {
-	f, err := os.Open(name)
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	var lines []string
-	s := bufio.NewScanner(f)
-	for s.Scan() {
-		lines = append(lines, s.Text())
-	}
-	return lines, s.Err()
-}
-
 func usage() {
 	fmt.Fprintf(os.Stderr, "pkg-diff-example [flags] file1 file2\n")
 	flag.PrintDefaults()
@@ -62,28 +38,13 @@ func main() {
 	}
 
 	aName := flag.Arg(0)
-	aLines, err := fileLines(aName)
-	check(err)
-
 	bName := flag.Arg(1)
-	bLines, err := fileLines(bName)
-	check(err)
 
-	ab := diff.Strings(aLines, bLines)
-	ctx := context.Background()
-	if *timeout != 0 {
-		var cancel context.CancelFunc
-		ctx, cancel = context.WithTimeout(ctx, *timeout)
-		defer cancel()
-	}
-	e := myers.Diff(ctx, ab)
-	e = ctxt.Size(e, *unified) // limit amount of output context
-	opts := []write.Option{
-		write.Names(aName, bName),
-	}
+	var opts []write.Option
 	if *color {
 		opts = append(opts, write.TerminalColor())
 	}
-	_, err = write.Unified(e, os.Stdout, ab, opts...)
+
+	err := diff.Text(aName, bName, nil, nil, os.Stdout, opts...)
 	check(err)
 }
diff --git a/diff.go b/diff.go
index f8689a2..c44c2a7 100644
--- a/diff.go
+++ b/diff.go
@@ -1 +1,136 @@
+// Package diff and its subpackages create, modify, and print diffs.
+//
+// Package diff contains high level routines that generate a textual diff.
+// It is implemented in terms of the diff/* subpackages.
+// If you want fine-grained control,
+// want to inspect an diff programmatically,
+// want to provide a context for cancellation,
+// need to diff gigantic files that don't fit in memory,
+// or want to diff unusual things,
+// use the subpackages.
 package diff
+
+import (
+	"bufio"
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"reflect"
+	"strings"
+
+	"github.com/pkg/diff/ctxt"
+	"github.com/pkg/diff/myers"
+	"github.com/pkg/diff/write"
+)
+
+// lines returns the lines contained in text/filename.
+// text and filename are interpreted as described in the docs for Text.
+func lines(filename string, text interface{}) ([]string, error) {
+	var r io.Reader
+	switch text := text.(type) {
+	case nil:
+		f, err := os.Open(filename)
+		if err != nil {
+			return nil, err
+		}
+		defer f.Close()
+		r = f
+	case string:
+		r = strings.NewReader(text)
+	case []byte:
+		r = bytes.NewReader(text)
+	case io.Reader:
+		r = text
+	default:
+		return nil, fmt.Errorf("unexpected type %T, want string, []byte, io.Reader, or nil", text)
+	}
+	var x []string
+	scan := bufio.NewScanner(r)
+	for scan.Scan() {
+		// TODO: intern? See intern comment in todo.go.
+		x = append(x, scan.Text())
+	}
+	return x, scan.Err()
+}
+
+// addNames adds a Names write.Option using aName and bName,
+// taking care to put it at the end,
+// so as not to overwrite any competing option.
+func addNames(aName, bName string, options []write.Option) []write.Option {
+	opts := make([]write.Option, len(options)+1)
+	opts[0] = write.Names(aName, bName)
+	copy(opts[1:], options)
+	return opts
+}
+
+// Text diffs a and b and writes the result to w.
+// It treats a and b as text, and splits them into lines using bufio.ScanLines.
+// aFile and bFile are filenames to use in the output.
+// a and b each may be nil or may have type string, []byte, or io.Reader.
+// If nil, the text is read from the filename.
+func Text(aFile, bFile string, a, b interface{}, w io.Writer, options ...write.Option) error {
+	aLines, err := lines(aFile, a)
+	if err != nil {
+		return err
+	}
+	bLines, err := lines(bFile, b)
+	if err != nil {
+		return err
+	}
+	ab := &diffStrings{a: aLines, b: bLines}
+	s := myers.Diff(context.Background(), ab)
+	s = ctxt.Size(s, 3)
+	opts := addNames(aFile, bFile, options)
+	_, err = write.Unified(s, w, ab, opts...)
+	return err
+}
+
+type diffStrings struct {
+	a, b []string
+}
+
+func (ab *diffStrings) LenA() int                                { return len(ab.a) }
+func (ab *diffStrings) LenB() int                                { return len(ab.b) }
+func (ab *diffStrings) Equal(ai, bi int) bool                    { return ab.a[ai] == ab.b[bi] }
+func (ab *diffStrings) WriteATo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.a[i]) }
+func (ab *diffStrings) WriteBTo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.b[i]) }
+
+// Slices diffs slices a and b and writes the result to w.
+// It uses fmt.Print to print the elements of a and b.
+// It uses reflect.DeepEqual to compare elements of a and b.
+// It uses aName and bName as the names of a and b in the output.
+func Slices(aName, bName string, a, b interface{}, w io.Writer, options ...write.Option) error {
+	ab := &diffSlices{a: reflect.ValueOf(a), b: reflect.ValueOf(b)}
+	if err := ab.validateTypes(); err != nil {
+		return err
+	}
+	s := myers.Diff(context.Background(), ab)
+	s = ctxt.Size(s, 3)
+	opts := addNames(aName, bName, options)
+	_, err := write.Unified(s, w, ab, opts...)
+	return err
+}
+
+type diffSlices struct {
+	a, b reflect.Value
+}
+
+func (ab *diffSlices) LenA() int                                { return ab.a.Len() }
+func (ab *diffSlices) LenB() int                                { return ab.b.Len() }
+func (ab *diffSlices) atA(i int) interface{}                    { return ab.a.Index(i).Interface() }
+func (ab *diffSlices) atB(i int) interface{}                    { return ab.b.Index(i).Interface() }
+func (ab *diffSlices) Equal(ai, bi int) bool                    { return reflect.DeepEqual(ab.atA(ai), ab.atB(bi)) }
+func (ab *diffSlices) WriteATo(w io.Writer, i int) (int, error) { return fmt.Fprint(w, ab.atA(i)) }
+func (ab *diffSlices) WriteBTo(w io.Writer, i int) (int, error) { return fmt.Fprint(w, ab.atB(i)) }
+
+func (ab *diffSlices) validateTypes() error {
+	if t := ab.a.Type(); t.Kind() != reflect.Slice {
+		return fmt.Errorf("a has type %v, must be a slice", t)
+	}
+	if t := ab.b.Type(); t.Kind() != reflect.Slice {
+		return fmt.Errorf("b has type %v, must be a slice", t)
+	}
+	return nil
+}
diff --git a/doc.go b/doc.go
deleted file mode 100644
index 1004526..0000000
--- a/doc.go
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
-
-Package diff creates and prints diffs.
-
-It is UNSTABLE. The API will change and there are bugs.
-
-TODO: more docs. But what, exactly?
-
-*/
-package diff
diff --git a/example_test.go b/example_test.go
index 2197b92..72571a7 100644
--- a/example_test.go
+++ b/example_test.go
@@ -1,42 +1,46 @@
 package diff_test
 
 import (
-	"context"
 	"os"
 
 	"github.com/pkg/diff"
-	"github.com/pkg/diff/ctxt"
-	"github.com/pkg/diff/myers"
-	"github.com/pkg/diff/write"
 )
 
-// TODO: use a less heavyweight output format for Example_testHelper
-
-func Example_testHelper() {
-	want := []int{1, 2, 3, 4, 5}
-	got := []int{1, 2, 4, 5}
-	ab := diff.Slices(want, got, nil)
-	e := myers.Diff(context.Background(), ab)
-	if e.IsIdentity() {
-		return
+func Example_Slices() {
+	want := []int{1, 2, 3, 4, 5, 6, 7, 8, 9}
+	got := []int{1, 2, 3, 4, 6, 7, 8, 9}
+	err := diff.Slices("want", "got", want, got, os.Stdout)
+	if err != nil {
+		panic(err)
 	}
-	e = ctxt.Size(e, 1)
-	write.Unified(e, os.Stdout, ab)
 	// Output:
-	// --- a
-	// +++ b
-	// @@ -2,3 +2,2 @@
+	// --- want
+	// +++ got
+	// @@ -2,7 +2,6 @@
 	//  2
-	// -3
+	//  3
 	//  4
+	// -5
+	//  6
+	//  7
+	//  8
 }
 
-func Example_strings() {
-	a := []string{"a", "b", "c"}
-	b := []string{"a", "c", "d"}
-	ab := diff.Strings(a, b)
-	e := myers.Diff(context.Background(), ab)
-	write.Unified(e, os.Stdout, ab)
+func Example_Text() {
+	a := `
+a
+b
+c
+`[1:]
+	b := `
+a
+c
+d
+`[1:]
+	err := diff.Text("a", "b", a, b, os.Stdout)
+	if err != nil {
+		panic(err)
+	}
 	// Output:
 	// --- a
 	// +++ b
@@ -46,19 +50,3 @@ func Example_strings() {
 	//  c
 	// +d
 }
-
-func Example_Names() {
-	a := []string{"a", "b", "c"}
-	b := []string{"a", "c", "d"}
-	ab := diff.Strings(a, b)
-	e := myers.Diff(context.Background(), ab)
-	write.Unified(e, os.Stdout, ab, write.Names("before", "after"))
-	// Output:
-	// --- before
-	// +++ after
-	// @@ -1,3 +1,3 @@
-	//  a
-	// -b
-	//  c
-	// +d
-}
diff --git a/todo.go b/todo.go
new file mode 100644
index 0000000..e586220
--- /dev/null
+++ b/todo.go
@@ -0,0 +1,31 @@
+package diff
+
+// TODO: add a package for diffing gigantic files.
+// Instead of reading the entire thing into memory, we could
+// scan through the file once, storing the location of all newlines in each file.
+// Then Seek/ReadAt to read each line lazily as needed,
+// relying on the OS page cache for performance.
+// This will allow diffing giant files with low memory use,
+// albeit at a some time cost.
+// An alternative is to mmap the files,
+// although this is OS-specific and can be fiddly.
+
+// TODO: add a package providing a StringIntern type, something like:
+//
+// type StringIntern struct {
+// 	s map[string]*string
+// }
+//
+// func (i *StringIntern) Bytes(b []byte) *string
+// func (i *StringIntern) String(s string) *string
+//
+// And document what it is and why to use it.
+// And consider adding helper functions to Strings and Bytes to use it.
+// The reason to use it is that a lot of the execution time in diffing
+// (which is an expensive operation) is taken up doing string comparisons.
+// If you have paid the O(n) cost to intern all strings involved in both A and B,
+// then string comparisons are reduced to cheap pointer comparisons.
+
+// TODO: consider adding an "it just works" test helper that accepts two slices (via interface{}),
+// diffs them using Strings or Bytes or Slices (using reflect.DeepEqual) as appropriate,
+// and calls t.Errorf with a generated diff if they're not equal.
diff --git a/write/unified_test.go b/write/unified_test.go
index b765864..4814798 100644
--- a/write/unified_test.go
+++ b/write/unified_test.go
@@ -3,10 +3,10 @@ package write_test
 import (
 	"bytes"
 	"context"
+	"io"
 	"strings"
 	"testing"
 
-	"github.com/pkg/diff"
 	"github.com/pkg/diff/ctxt"
 	"github.com/pkg/diff/myers"
 	"github.com/pkg/diff/write"
@@ -79,7 +79,7 @@ func TestGolden(t *testing.T) {
 		t.Run(test.name, func(t *testing.T) {
 			as := strings.Split(test.a, "\n")
 			bs := strings.Split(test.b, "\n")
-			ab := diff.Strings(as, bs)
+			ab := &diffStrings{a: as, b: bs}
 			// TODO: supply an edit.Script to the tests instead doing a Myers diff here.
 			// Doing it as I have done, the lazy way, mixes concerns: diff algorithm vs unification algorithm
 			// vs unified diff formatting.
@@ -102,3 +102,13 @@ func TestGolden(t *testing.T) {
 		})
 	}
 }
+
+type diffStrings struct {
+	a, b []string
+}
+
+func (ab *diffStrings) LenA() int                                { return len(ab.a) }
+func (ab *diffStrings) LenB() int                                { return len(ab.b) }
+func (ab *diffStrings) Equal(ai, bi int) bool                    { return ab.a[ai] == ab.b[bi] }
+func (ab *diffStrings) WriteATo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.a[i]) }
+func (ab *diffStrings) WriteBTo(w io.Writer, i int) (int, error) { return io.WriteString(w, ab.b[i]) }