From 0945faef5d3973d5d0ffd109322fc4f83919cee5 Mon Sep 17 00:00:00 2001 From: Nitin Garg <113666283+gargnitingoogle@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:30:11 +0000 Subject: [PATCH] [unsupported-object-issue-fix] Add utility for empty directory list issue (#2561) * Add utilities to filter unsupported objects * [testing] shorten code in bucket-handle unit tests * s/isUnsupportedObjectName/IsUnsupportedObjectName * Remove unnecessary functions * IsUnsupportedObjectName -> storageutil package * gcs_util* to unsupported_object_util* Addresses a review comment. --- internal/storage/bucket_handle_test.go | 59 +++++-------- .../storageutil/unsupported_object_util.go | 47 ++++++++++ .../unsupported_object_util_test.go | 85 +++++++++++++++++++ 3 files changed, 151 insertions(+), 40 deletions(-) create mode 100644 internal/storage/storageutil/unsupported_object_util.go create mode 100644 internal/storage/storageutil/unsupported_object_util_test.go diff --git a/internal/storage/bucket_handle_test.go b/internal/storage/bucket_handle_test.go index 671bfb7cfb..68ae584c2e 100644 --- a/internal/storage/bucket_handle_test.go +++ b/internal/storage/bucket_handle_test.go @@ -49,6 +49,16 @@ var ContentDisposition string = "ContentDisposition" // Hence, we are not writing tests for these flows. // https://github.com/GoogleCloudPlatform/gcsfuse/blob/master/vendor/github.com/fsouza/fake-gcs-server/fakestorage/object.go#L515 +func objectsToObjectNames(objects []*gcs.Object) (objectNames []string) { + objectNames = make([]string, len(objects)) + for i, object := range objects { + if object != nil { + objectNames[i] = object.Name + } + } + return +} + type BucketHandleTest struct { suite.Suite bucketHandle *bucketHandle @@ -447,13 +457,9 @@ func (testSuite *BucketHandleTest) TestListObjectMethodWithPrefixObjectExist() { }) assert.Nil(testSuite.T(), err) - assert.Equal(testSuite.T(), 4, len(obj.Objects)) - assert.Equal(testSuite.T(), 1, len(obj.CollapsedRuns)) - assert.Equal(testSuite.T(), TestObjectRootFolderName, obj.Objects[0].Name) - assert.Equal(testSuite.T(), TestObjectSubRootFolderName, obj.Objects[1].Name) - assert.Equal(testSuite.T(), TestObjectName, obj.Objects[2].Name) + assert.Equal(testSuite.T(), []string{TestObjectRootFolderName, TestObjectSubRootFolderName, TestObjectName, TestGzipObjectName}, objectsToObjectNames(obj.Objects)) assert.Equal(testSuite.T(), TestObjectGeneration, obj.Objects[0].Generation) - assert.Equal(testSuite.T(), TestObjectSubRootFolderName, obj.CollapsedRuns[0]) + assert.Equal(testSuite.T(), []string{TestObjectSubRootFolderName}, obj.CollapsedRuns) } func (testSuite *BucketHandleTest) TestListObjectMethodWithPrefixObjectDoesNotExist() { @@ -484,12 +490,8 @@ func (testSuite *BucketHandleTest) TestListObjectMethodWithIncludeTrailingDelimi }) assert.Nil(testSuite.T(), err) - assert.Equal(testSuite.T(), 3, len(obj.Objects)) - assert.Equal(testSuite.T(), 1, len(obj.CollapsedRuns)) - assert.Equal(testSuite.T(), TestObjectRootFolderName, obj.Objects[0].Name) - assert.Equal(testSuite.T(), TestObjectName, obj.Objects[1].Name) - assert.Equal(testSuite.T(), TestGzipObjectName, obj.Objects[2].Name) - assert.Equal(testSuite.T(), TestObjectSubRootFolderName, obj.CollapsedRuns[0]) + assert.Equal(testSuite.T(), []string{TestObjectRootFolderName, TestObjectName, TestGzipObjectName}, objectsToObjectNames(obj.Objects)) + assert.Equal(testSuite.T(), []string{TestObjectSubRootFolderName}, obj.CollapsedRuns) } // If Delimiter is empty, all the objects will appear with same prefix. @@ -505,12 +507,7 @@ func (testSuite *BucketHandleTest) TestListObjectMethodWithEmptyDelimiter() { }) assert.Nil(testSuite.T(), err) - assert.Equal(testSuite.T(), 5, len(obj.Objects)) - assert.Equal(testSuite.T(), TestObjectRootFolderName, obj.Objects[0].Name) - assert.Equal(testSuite.T(), TestObjectSubRootFolderName, obj.Objects[1].Name) - assert.Equal(testSuite.T(), TestSubObjectName, obj.Objects[2].Name) - assert.Equal(testSuite.T(), TestObjectName, obj.Objects[3].Name) - assert.Equal(testSuite.T(), TestGzipObjectName, obj.Objects[4].Name) + assert.Equal(testSuite.T(), []string{TestObjectRootFolderName, TestObjectSubRootFolderName, TestSubObjectName, TestObjectName, TestGzipObjectName}, objectsToObjectNames(obj.Objects)) assert.Equal(testSuite.T(), TestObjectGeneration, obj.Objects[0].Generation) assert.Nil(testSuite.T(), obj.CollapsedRuns) } @@ -539,12 +536,7 @@ func (testSuite *BucketHandleTest) TestListObjectMethodForMaxResult() { // Validate that 5 objects are listed when MaxResults is passed 5. assert.Nil(testSuite.T(), err) - assert.Equal(testSuite.T(), 5, len(fiveObj.Objects)) - assert.Equal(testSuite.T(), TestObjectRootFolderName, fiveObj.Objects[0].Name) - assert.Equal(testSuite.T(), TestObjectSubRootFolderName, fiveObj.Objects[1].Name) - assert.Equal(testSuite.T(), TestSubObjectName, fiveObj.Objects[2].Name) - assert.Equal(testSuite.T(), TestObjectName, fiveObj.Objects[3].Name) - assert.Equal(testSuite.T(), TestGzipObjectName, fiveObj.Objects[4].Name) + assert.Equal(testSuite.T(), []string{TestObjectRootFolderName, TestObjectSubRootFolderName, TestSubObjectName, TestObjectName, TestGzipObjectName}, objectsToObjectNames(fiveObj.Objects)) assert.Nil(testSuite.T(), fiveObj.CollapsedRuns) // Note: The behavior is different in real GCS storage JSON API. In real API, @@ -554,10 +546,7 @@ func (testSuite *BucketHandleTest) TestListObjectMethodForMaxResult() { // This is because fake storage doesn'testSuite support pagination and hence maxResults // has no affect. assert.Nil(testSuite.T(), err2) - assert.Equal(testSuite.T(), 3, len(threeObj.Objects)) - assert.Equal(testSuite.T(), TestObjectRootFolderName, threeObj.Objects[0].Name) - assert.Equal(testSuite.T(), TestObjectName, threeObj.Objects[1].Name) - assert.Equal(testSuite.T(), TestGzipObjectName, threeObj.Objects[2].Name) + assert.Equal(testSuite.T(), []string{TestObjectRootFolderName, TestObjectName, TestGzipObjectName}, objectsToObjectNames(threeObj.Objects)) assert.Equal(testSuite.T(), 1, len(threeObj.CollapsedRuns)) } @@ -586,12 +575,7 @@ func (testSuite *BucketHandleTest) TestListObjectMethodWithMissingMaxResult() { // Validate that all objects (5) are listed when MaxResults is not passed. assert.Nil(testSuite.T(), err2) - assert.Equal(testSuite.T(), 5, len(fiveObjWithoutMaxResults.Objects)) - assert.Equal(testSuite.T(), TestObjectRootFolderName, fiveObjWithoutMaxResults.Objects[0].Name) - assert.Equal(testSuite.T(), TestObjectSubRootFolderName, fiveObjWithoutMaxResults.Objects[1].Name) - assert.Equal(testSuite.T(), TestSubObjectName, fiveObjWithoutMaxResults.Objects[2].Name) - assert.Equal(testSuite.T(), TestObjectName, fiveObjWithoutMaxResults.Objects[3].Name) - assert.Equal(testSuite.T(), TestGzipObjectName, fiveObjWithoutMaxResults.Objects[4].Name) + assert.Equal(testSuite.T(), []string{TestObjectRootFolderName, TestObjectSubRootFolderName, TestSubObjectName, TestObjectName, TestGzipObjectName}, objectsToObjectNames(fiveObjWithoutMaxResults.Objects)) assert.Nil(testSuite.T(), fiveObjWithoutMaxResults.CollapsedRuns) } @@ -622,12 +606,7 @@ func (testSuite *BucketHandleTest) TestListObjectMethodWithZeroMaxResult() { // Validate that all objects (5) are listed when MaxResults is 0. This has // same behavior as not passing MaxResults in request. assert.Nil(testSuite.T(), err2) - assert.Equal(testSuite.T(), 5, len(fiveObjWithZeroMaxResults.Objects)) - assert.Equal(testSuite.T(), TestObjectRootFolderName, fiveObjWithZeroMaxResults.Objects[0].Name) - assert.Equal(testSuite.T(), TestObjectSubRootFolderName, fiveObjWithZeroMaxResults.Objects[1].Name) - assert.Equal(testSuite.T(), TestSubObjectName, fiveObjWithZeroMaxResults.Objects[2].Name) - assert.Equal(testSuite.T(), TestObjectName, fiveObjWithZeroMaxResults.Objects[3].Name) - assert.Equal(testSuite.T(), TestGzipObjectName, fiveObjWithZeroMaxResults.Objects[4].Name) + assert.Equal(testSuite.T(), []string{TestObjectRootFolderName, TestObjectSubRootFolderName, TestSubObjectName, TestObjectName, TestGzipObjectName}, objectsToObjectNames(fiveObjWithZeroMaxResults.Objects)) assert.Nil(testSuite.T(), fiveObjWithZeroMaxResults.CollapsedRuns) } diff --git a/internal/storage/storageutil/unsupported_object_util.go b/internal/storage/storageutil/unsupported_object_util.go new file mode 100644 index 0000000000..87cc80464b --- /dev/null +++ b/internal/storage/storageutil/unsupported_object_util.go @@ -0,0 +1,47 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storageutil + +import ( + "strings" +) + +var ( + unsupportedObjectNameSubstrings = []string{"//"} + unsupportedObjectNamePrefixes = []string{"/"} + unsupportedObjectNames = []string{""} +) + +// IsUnsupportedObjectName returns true if the passed +// string is a valid GCS object name or prefix, +// which is unsupported in GCSFuse. +func IsUnsupportedObjectName(name string) bool { + for _, substring := range unsupportedObjectNameSubstrings { + if strings.Contains(name, substring) { + return true + } + } + for _, prefix := range unsupportedObjectNamePrefixes { + if strings.HasPrefix(name, prefix) { + return true + } + } + for _, unsupportedObjectName := range unsupportedObjectNames { + if name == unsupportedObjectName { + return true + } + } + return false +} diff --git a/internal/storage/storageutil/unsupported_object_util_test.go b/internal/storage/storageutil/unsupported_object_util_test.go new file mode 100644 index 0000000000..f23344ae49 --- /dev/null +++ b/internal/storage/storageutil/unsupported_object_util_test.go @@ -0,0 +1,85 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storageutil_test + +import ( + "fmt" + "testing" + + . "github.com/googlecloudplatform/gcsfuse/v2/internal/storage/storageutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" +) + +//////////////////////////////////////////////////////////////////////// +// Boilerplate +//////////////////////////////////////////////////////////////////////// + +type GcsUtilTest struct { + suite.Suite +} + +func TestGcsUtil(t *testing.T) { + suite.Run(t, new(GcsUtilTest)) +} + +// ////////////////////////////////////////////////////////////////////// +// Tests +// ////////////////////////////////////////////////////////////////////// +func (ts *GcsUtilTest) TestIsUnsupportedObjectName() { + cases := []struct { + name string + isUnsupported bool + }{ + { + name: "foo", + isUnsupported: false, + }, + { + name: "foo/bar", + isUnsupported: false, + }, + { + name: "foo//bar", + isUnsupported: true, + }, + { + name: "abc/", + isUnsupported: false, + }, + { + name: "abc//", + isUnsupported: true, + }, + { + name: "/foo", + isUnsupported: true, + }, + { + name: "/", + isUnsupported: true, + }, + { + name: "", + isUnsupported: true, + }, + } + + for _, tc := range cases { + ts.Run(fmt.Sprintf("name=%s", tc.name), func() { + assert.Equal(ts.T(), tc.isUnsupported, IsUnsupportedObjectName(tc.name)) + }) + } +}