Skip to content

Commit

Permalink
Added storage preallocation for the target file in copy_file on Linux.
Browse files Browse the repository at this point in the history
Use Linux fallocate system call to preallocate storage for the target
file in copy_file backends based on sendfile and copy_file_range. These
backends are only used when the file size is known beforehand, and
preallocating storage allows to reduce filesystem fragmentation and
get an early error if there's not enough free space on the target
filesystem.

Preallocation is only done as an optimization/hint. On filesystems
that do not support it we continue the data copying process as before.
This is why we aren't using posix_fallocate, because glibc contains
an emulation path that is used when the filesystem doesn't support
the functionality. We don't want this emulation, as it would effectively
double the amount of written data.
  • Loading branch information
Lastique committed Jan 4, 2024
1 parent cf135d3 commit d03fa94
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 3 deletions.
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ endif()
check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_fdopendir_nofollow.cpp>" BOOST_FILESYSTEM_HAS_FDOPENDIR_NOFOLLOW)
check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_dirent_d_type.cpp>" BOOST_FILESYSTEM_HAS_DIRENT_D_TYPE)
check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_posix_at_apis.cpp>" BOOST_FILESYSTEM_HAS_POSIX_AT_APIS)
check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_fallocate.cpp>" BOOST_FILESYSTEM_HAS_FALLOCATE)
if(WIN32 AND NOT BOOST_FILESYSTEM_DISABLE_BCRYPT)
set(CMAKE_REQUIRED_LIBRARIES bcrypt)
check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_bcrypt.cpp>" BOOST_FILESYSTEM_HAS_BCRYPT)
Expand Down Expand Up @@ -206,6 +207,9 @@ endif()
if(BOOST_FILESYSTEM_HAS_POSIX_AT_APIS)
target_compile_definitions(boost_filesystem PRIVATE BOOST_FILESYSTEM_HAS_POSIX_AT_APIS)
endif()
if(BOOST_FILESYSTEM_HAS_FALLOCATE)
target_compile_definitions(boost_filesystem PRIVATE BOOST_FILESYSTEM_HAS_FALLOCATE)
endif()

target_link_libraries(boost_filesystem
PUBLIC
Expand Down
1 change: 1 addition & 0 deletions build/Jamfile.v2
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ project boost/filesystem
[ check-target-builds ../config//has_fdopendir_nofollow "has fdopendir(O_NOFOLLOW)" : <define>BOOST_FILESYSTEM_HAS_FDOPENDIR_NOFOLLOW ]
[ check-target-builds ../config//has_dirent_d_type "has dirent::d_type" : <define>BOOST_FILESYSTEM_HAS_DIRENT_D_TYPE ]
[ check-target-builds ../config//has_posix_at_apis "has POSIX *at APIs" : <define>BOOST_FILESYSTEM_HAS_POSIX_AT_APIS ]
[ check-target-builds ../config//has_fallocate "has fallocate" : <define>BOOST_FILESYSTEM_HAS_FALLOCATE ]
<conditional>@check-statx
<conditional>@select-windows-crypto-api
<conditional>@check-cxx20-atomic-ref
Expand Down
2 changes: 2 additions & 0 deletions config/Jamfile.v2
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ obj has_dirent_d_type : has_dirent_d_type.cpp : <include>../src ;
explicit has_dirent_d_type ;
obj has_posix_at_apis : has_posix_at_apis.cpp : <include>../src ;
explicit has_posix_at_apis ;
obj has_fallocate : has_fallocate.cpp : <include>../src ;
explicit has_fallocate ;

lib bcrypt ;
explicit bcrypt ;
Expand Down
19 changes: 19 additions & 0 deletions config/has_fallocate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Copyright 2024 Andrey Semashev

// Distributed under the Boost Software License, Version 1.0.
// See http://www.boost.org/LICENSE_1_0.txt

// See library home page at http://www.boost.org/libs/filesystem

#include "platform_config.hpp"

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

int main()
{
int fd = open("file.txt", O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH);
int err = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4096);
return err != 0;
}
1 change: 1 addition & 0 deletions doc/release_history.html
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ <h2>1.85.0</h2>
<li><b>v4:</b> <code>path::generic_path</code>, <code>path::generic_string</code>, <code>path::make_preferred</code> and <code>path::lexically_normal</code> avoid converting between backslashes and forward slashes in path root names. For example, on Windows, <code>path("\\\\?\\c:\\foo").generic_string()</code> now returns "\\?\c:/foo" instead of "//?/c:/foo". Similarly, <code>path("\\\\host/share/foo/..").lexically_normal()</code> now returns "\\host\share".</li>
<li>Added a <code>unique_path</code> overload taking a single <code>error_code&amp; ec</code> argument. The overload generates a unique path using the default path model.</li>
<li><code>weakly_canonical</code> now produces an absolute path if the input path is relative and contains no elements that exist in the filesystem. (<a href="https://github.com/boostorg/filesystem/issues/300">#300</a>)</li>
<li>On Linux, <code>copy_file</code> backends based on <code>sendfile</code> and <code>copy_file_range</code> system calls will attempt to preallocate storage for the target file. This may reduce filesystem fragmentation and provide early error indication if there is not enough free space. Not all filesystems support this feature; file copying proceeds if storage preallocation is not supported.</li>
</ul>

<h2>1.84.0</h2>
Expand Down
54 changes: 51 additions & 3 deletions src/operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,6 +656,54 @@ inline int data_sync(int fd)
#endif
}

//! Hints the filesystem to opportunistically preallocate storage for a file
inline int preallocate_storage(int file, uintmax_t size)
{
#if defined(BOOST_FILESYSTEM_HAS_FALLOCATE)
if (BOOST_LIKELY(size > 0 && size <= static_cast< uintmax_t >((std::numeric_limits< off_t >::max)())))
{
while (true)
{
// Note: We intentionally use fallocate rather than posix_fallocate to avoid
// invoking glibc emulation that writes zeros to the end of the file.
// We want this call to act like a hint to the filesystem and an early
// check for the free storage space. We don't want to write zeros only
// to later overwrite them with the actual data.
int err = fallocate(file, FALLOC_FL_KEEP_SIZE, 0, static_cast< off_t >(size));
if (BOOST_UNLIKELY(err != 0))
{
err = errno;
if (err == EINTR)
continue;

// Ignore the error if the operation is not supported by the kernel or filesystem
if (err != ENOSYS && err != EOPNOTSUPP)
return err;
}

break;
}
}
#endif

return 0;
}

//! copy_file implementation wrapper that preallocates storage for the target file
template< typename CopyFileData >
struct copy_file_data_preallocate
{
//! copy_file implementation wrapper that preallocates storage for the target file before invoking the underlying copy implementation
static int impl(int infile, int outfile, uintmax_t size, std::size_t blksize)
{
int err = preallocate_storage(outfile, size);
if (BOOST_UNLIKELY(err != 0))
return err;

return CopyFileData::impl(infile, outfile, size, blksize);
}
};

// Min and max buffer sizes are selected to minimize the overhead from system calls.
// The values are picked based on coreutils cp(1) benchmarking data described here:
// https://github.com/coreutils/coreutils/blob/d1b0257077c0b0f0ee25087efd46270345d1dd1f/src/ioblksize.h#L23-L72
Expand Down Expand Up @@ -864,7 +912,7 @@ struct copy_file_data_copy_file_range
if (err == ENOSYS)
{
#if defined(BOOST_FILESYSTEM_USE_SENDFILE)
filesystem::detail::atomic_store_relaxed(copy_file_data, &check_fs_type< copy_file_data_sendfile >);
filesystem::detail::atomic_store_relaxed(copy_file_data, &check_fs_type< copy_file_data_preallocate< copy_file_data_sendfile > >);
goto fallback_to_sendfile;
#else
filesystem::detail::atomic_store_relaxed(copy_file_data, &copy_file_data_read_write);
Expand Down Expand Up @@ -941,14 +989,14 @@ inline void init_copy_file_data_impl(unsigned int major_ver, unsigned int minor_
#if defined(BOOST_FILESYSTEM_USE_SENDFILE)
// sendfile started accepting file descriptors as the target in Linux 2.6.33
if (major_ver > 2u || (major_ver == 2u && (minor_ver > 6u || (minor_ver == 6u && patch_ver >= 33u))))
cfd = &check_fs_type< copy_file_data_sendfile >;
cfd = &check_fs_type< copy_file_data_preallocate< copy_file_data_sendfile > >;
#endif

#if defined(BOOST_FILESYSTEM_USE_COPY_FILE_RANGE)
// Although copy_file_range appeared in Linux 4.5, it did not support cross-filesystem copying until 5.3.
// copy_file_data_copy_file_range will fallback to copy_file_data_sendfile if copy_file_range returns EXDEV.
if (major_ver > 4u || (major_ver == 4u && minor_ver >= 5u))
cfd = &check_fs_type< copy_file_data_copy_file_range >;
cfd = &check_fs_type< copy_file_data_preallocate< copy_file_data_copy_file_range > >;
#endif

filesystem::detail::atomic_store_relaxed(copy_file_data, cfd);
Expand Down

0 comments on commit d03fa94

Please sign in to comment.