From 39e467aa533004f6abceb99acba8837e880c8fbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 27 Sep 2024 19:47:25 +0200 Subject: [PATCH] Use tar-split/tar/asm.IterateHeaders now that it has been accepted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... instead of our version which makes assumptions on the internal decisions of the tar-split project, and needs heuristics to guess where file padding ends. Signed-off-by: Miloslav Trmač --- pkg/chunked/compression_linux.go | 6 +- pkg/chunked/tar_split_linux.go | 68 ------------------ pkg/chunked/tar_split_linux_test.go | 104 ---------------------------- 3 files changed, 5 insertions(+), 173 deletions(-) delete mode 100644 pkg/chunked/tar_split_linux.go delete mode 100644 pkg/chunked/tar_split_linux_test.go diff --git a/pkg/chunked/compression_linux.go b/pkg/chunked/compression_linux.go index 633740a280..e660e2a376 100644 --- a/pkg/chunked/compression_linux.go +++ b/pkg/chunked/compression_linux.go @@ -2,6 +2,7 @@ package chunked import ( archivetar "archive/tar" + "bytes" "errors" "fmt" "io" @@ -14,6 +15,8 @@ import ( "github.com/klauspost/pgzip" digest "github.com/opencontainers/go-digest" "github.com/vbatts/tar-split/archive/tar" + "github.com/vbatts/tar-split/tar/asm" + "github.com/vbatts/tar-split/tar/storage" expMaps "golang.org/x/exp/maps" ) @@ -256,7 +259,8 @@ func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error { } } - if err := iterateTarSplit(tarSplit, func(hdr *tar.Header) error { + unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit)) + if err := asm.IterateHeaders(unpacker, func(hdr *tar.Header) error { e, ok := pendingFiles[hdr.Name] if !ok { return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name) diff --git a/pkg/chunked/tar_split_linux.go b/pkg/chunked/tar_split_linux.go deleted file mode 100644 index aeb1698db2..0000000000 --- a/pkg/chunked/tar_split_linux.go +++ /dev/null @@ -1,68 +0,0 @@ -package chunked - -import ( - "bytes" - "fmt" - "io" - - "github.com/vbatts/tar-split/archive/tar" - "github.com/vbatts/tar-split/tar/storage" -) - -// iterateTarSplit calls handler for each tar header in tarSplit -func iterateTarSplit(tarSplit []byte, handler func(hdr *tar.Header) error) error { - // This, strictly speaking, hard-codes undocumented assumptions about how github.com/vbatts/tar-split/tar/asm.NewInputTarStream - // forms the tar-split contents. Pragmatically, NewInputTarStream should always produce storage.FileType entries at least - // for every non-empty file, which constraints it basically to the output we expect. - // - // Specifically, we assume: - // - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions - // - (There is a FileType entry for every tar header, we ignore it) - // - Trailing padding of a file, if any, is included in the next SegmentType entry - // - At the end, there may be SegmentType entries just for the terminating zero blocks. - - unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit)) - for { - tsEntry, err := unpacker.Next() - if err != nil { - if err == io.EOF { - return nil - } - return fmt.Errorf("reading tar-split entries: %w", err) - } - switch tsEntry.Type { - case storage.SegmentType: - payload := tsEntry.Payload - // This is horrible, but we don’t know how much padding to skip. (It can be computed from the previous hdr.Size for non-sparse - // files, but for sparse files that is set to the logical size.) - // - // First, assume that all padding is zero bytes. - // A tar header starts with a file name, which might in principle be empty, but - // at least https://github.com/opencontainers/image-spec/blob/main/layer.md#populate-initial-filesystem suggests that - // the tar name should never be empty (it should be ".", or maybe "./"). - // - // This will cause us to skip all zero bytes in the trailing blocks, but that’s fine. - i := 0 - for i < len(payload) && payload[i] == 0 { - i++ - } - payload = payload[i:] - tr := tar.NewReader(bytes.NewReader(payload)) - hdr, err := tr.Next() - if err != nil { - if err == io.EOF { // Probably the last entry, but let’s let the unpacker drive that. - break - } - return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err) - } - if err := handler(hdr); err != nil { - return err - } - - case storage.FileType: - // Nothing - default: - return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type) - } - } -} diff --git a/pkg/chunked/tar_split_linux_test.go b/pkg/chunked/tar_split_linux_test.go deleted file mode 100644 index d180d33b7e..0000000000 --- a/pkg/chunked/tar_split_linux_test.go +++ /dev/null @@ -1,104 +0,0 @@ -package chunked - -import ( - "bytes" - "fmt" - "io" - "testing" - "time" - - "github.com/containers/storage/pkg/chunked/internal" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/vbatts/tar-split/archive/tar" - "github.com/vbatts/tar-split/tar/asm" - "github.com/vbatts/tar-split/tar/storage" -) - -func createTestTarheader(index int, typeFlag byte, size int64) tar.Header { - n := (index + 1) * 100 // Use predictable, but distinct, values for all headers - - res := tar.Header{ - Typeflag: typeFlag, - Name: fmt.Sprintf("name%d", n), - Size: size, - Mode: int64(n + 1), - Uid: n + 2, - Gid: n + 3, - Uname: fmt.Sprintf("user%d", n), - Gname: fmt.Sprintf("group%d", n), - ModTime: time.Unix(int64(n+4), 0), - AccessTime: time.Unix(int64(n+5), 0), - ChangeTime: time.Unix(int64(n+6), 0), - PAXRecords: map[string]string{fmt.Sprintf("key%d", n): fmt.Sprintf("value%d", n)}, - Format: tar.FormatPAX, // We must set a format, in the default one AccessTime and ChangeTime are discarded. - } - switch res.Typeflag { - case tar.TypeLink, tar.TypeSymlink: - res.Linkname = fmt.Sprintf("link%d", n) - case tar.TypeChar, tar.TypeBlock: - res.Devmajor = int64(n + 7) - res.Devminor = int64(n + 8) - } - return res -} - -func TestIterateTarSplit(t *testing.T) { - entries := []struct { - typeFlag byte - size int64 - }{ - {tar.TypeReg, 0}, - {tar.TypeReg, 1}, - {tar.TypeReg, 511}, - {tar.TypeReg, 512}, - {tar.TypeReg, 513}, - {tar.TypeLink, 0}, - {tar.TypeSymlink, 0}, - {tar.TypeChar, 0}, - {tar.TypeBlock, 0}, - {tar.TypeDir, 0}, - {tar.TypeFifo, 0}, - } - - var tarball bytes.Buffer - var expected []tar.Header - w := tar.NewWriter(&tarball) - for i, e := range entries { - hdr := createTestTarheader(i, e.typeFlag, e.size) - err := w.WriteHeader(&hdr) - require.NoError(t, err) - data := make([]byte, e.size) - _, err = w.Write(data) - require.NoError(t, err) - expected = append(expected, hdr) - } - err := w.Close() - require.NoError(t, err) - - var tarSplit bytes.Buffer - tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter()) - require.NoError(t, err) - _, err = io.Copy(io.Discard, tsReader) - require.NoError(t, err) - - var actual []tar.Header - err = iterateTarSplit(tarSplit.Bytes(), func(hdr *tar.Header) error { - actual = append(actual, *hdr) - return nil - }) - require.NoError(t, err) - - assert.Equal(t, len(expected), len(actual)) - for i := range expected { - // We would have to open-code an equality comparison of time.Time values; instead, convert to FileMetadata, - // because we already have that implemented for that type — and because it provides a tiny bit of code coverage - // testing for ensureFileMetadataAttributesMatch. - expected1, err := internal.NewFileMetadata(&expected[i]) - require.NoError(t, err, i) - actual1, err := internal.NewFileMetadata(&actual[i]) - require.NoError(t, err, i) - err = ensureFileMetadataAttributesMatch(&expected1, &actual1) - assert.NoError(t, err, i) - } -}