From d7e6e3100ae4b58c814879853b50a836d3bdea58 Mon Sep 17 00:00:00 2001 From: Andrey Semashev Date: Fri, 5 Jan 2024 02:05:10 +0300 Subject: [PATCH] Added storage preallocation for the target file in copy_file on Linux. Use Linux fallocate system call to preallocate storage for the target file in copy_file backends based on sendfile and copy_file_range. These backends are only used when the file size is known beforehand, and preallocating storage allows to reduce filesystem fragmentation and get an early error if there's not enough free space on the target filesystem. Preallocation is only done as an optimization/hint. On filesystems that do not support it we continue the data copying process as before. This is why we aren't using posix_fallocate, because glibc contains an emulation path that is used when the filesystem doesn't support the functionality. We don't want this emulation, as it would effectively double the amount of written data. --- CMakeLists.txt | 4 +++ build/Jamfile.v2 | 1 + config/Jamfile.v2 | 2 ++ config/has_fallocate.cpp | 19 +++++++++++++ doc/release_history.html | 1 + src/operations.cpp | 59 +++++++++++++++++++++++++++++++++++++--- 6 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 config/has_fallocate.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0bf330466..fa08429a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -48,6 +48,7 @@ endif() check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_fdopendir_nofollow.cpp>" BOOST_FILESYSTEM_HAS_FDOPENDIR_NOFOLLOW) check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_dirent_d_type.cpp>" BOOST_FILESYSTEM_HAS_DIRENT_D_TYPE) check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_posix_at_apis.cpp>" BOOST_FILESYSTEM_HAS_POSIX_AT_APIS) +check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_fallocate.cpp>" BOOST_FILESYSTEM_HAS_FALLOCATE) if(WIN32 AND NOT BOOST_FILESYSTEM_DISABLE_BCRYPT) set(CMAKE_REQUIRED_LIBRARIES bcrypt) check_cxx_source_compiles("#include <${CMAKE_CURRENT_SOURCE_DIR}/config/has_bcrypt.cpp>" BOOST_FILESYSTEM_HAS_BCRYPT) @@ -206,6 +207,9 @@ endif() if(BOOST_FILESYSTEM_HAS_POSIX_AT_APIS) target_compile_definitions(boost_filesystem PRIVATE BOOST_FILESYSTEM_HAS_POSIX_AT_APIS) endif() +if(BOOST_FILESYSTEM_HAS_FALLOCATE) + target_compile_definitions(boost_filesystem PRIVATE BOOST_FILESYSTEM_HAS_FALLOCATE) +endif() target_link_libraries(boost_filesystem PUBLIC diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index d2fd77c2d..1fd60271f 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -136,6 +136,7 @@ project boost/filesystem [ check-target-builds ../config//has_fdopendir_nofollow "has fdopendir(O_NOFOLLOW)" : BOOST_FILESYSTEM_HAS_FDOPENDIR_NOFOLLOW ] [ check-target-builds ../config//has_dirent_d_type "has dirent::d_type" : BOOST_FILESYSTEM_HAS_DIRENT_D_TYPE ] [ check-target-builds ../config//has_posix_at_apis "has POSIX *at APIs" : BOOST_FILESYSTEM_HAS_POSIX_AT_APIS ] + [ check-target-builds ../config//has_fallocate "has fallocate" : BOOST_FILESYSTEM_HAS_FALLOCATE ] @check-statx @select-windows-crypto-api @check-cxx20-atomic-ref diff --git a/config/Jamfile.v2 b/config/Jamfile.v2 index 1b362767e..4ca2ced4d 100644 --- a/config/Jamfile.v2 +++ b/config/Jamfile.v2 @@ -33,6 +33,8 @@ obj has_dirent_d_type : has_dirent_d_type.cpp : ../src ; explicit has_dirent_d_type ; obj has_posix_at_apis : has_posix_at_apis.cpp : ../src ; explicit has_posix_at_apis ; +obj has_fallocate : has_fallocate.cpp : ../src ; +explicit has_fallocate ; lib bcrypt ; explicit bcrypt ; diff --git a/config/has_fallocate.cpp b/config/has_fallocate.cpp new file mode 100644 index 000000000..f7ae3739d --- /dev/null +++ b/config/has_fallocate.cpp @@ -0,0 +1,19 @@ +// Copyright 2024 Andrey Semashev + +// Distributed under the Boost Software License, Version 1.0. +// See http://www.boost.org/LICENSE_1_0.txt + +// See library home page at http://www.boost.org/libs/filesystem + +#include "platform_config.hpp" + +#include +#include +#include + +int main() +{ + int fd = open("file.txt", O_CREAT | O_TRUNC | O_WRONLY, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH); + int err = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4096); + return err != 0; +} diff --git a/doc/release_history.html b/doc/release_history.html index b9fa090fc..15f2af672 100644 --- a/doc/release_history.html +++ b/doc/release_history.html @@ -45,6 +45,7 @@

1.85.0

  • v4: path::generic_path, path::generic_string, path::make_preferred and path::lexically_normal avoid converting between backslashes and forward slashes in path root names. For example, on Windows, path("\\\\?\\c:\\foo").generic_string() now returns "\\?\c:/foo" instead of "//?/c:/foo". Similarly, path("\\\\host/share/foo/..").lexically_normal() now returns "\\host\share".
  • Added a unique_path overload taking a single error_code& ec argument. The overload generates a unique path using the default path model.
  • weakly_canonical now produces an absolute path if the input path is relative and contains no elements that exist in the filesystem. (#300)
  • +
  • On Linux, copy_file backends based on sendfile and copy_file_range system calls will attempt to preallocate storage for the target file. This may reduce filesystem fragmentation and provide early error indication if there is not enough free space. Not all filesystems support this feature; file copying proceeds if storage preallocation is not supported.
  • 1.84.0

    diff --git a/src/operations.cpp b/src/operations.cpp index bcf660d85..ed5b378e4 100644 --- a/src/operations.cpp +++ b/src/operations.cpp @@ -2,7 +2,7 @@ // Copyright 2002-2009, 2014 Beman Dawes // Copyright 2001 Dietmar Kuehl -// Copyright 2018-2022 Andrey Semashev +// Copyright 2018-2024 Andrey Semashev // Distributed under the Boost Software License, Version 1.0. // See http://www.boost.org/LICENSE_1_0.txt @@ -656,6 +656,57 @@ inline int data_sync(int fd) #endif } +//! Hints the filesystem to opportunistically preallocate storage for a file +inline int preallocate_storage(int file, uintmax_t size) +{ +#if defined(BOOST_FILESYSTEM_HAS_FALLOCATE) + if (BOOST_LIKELY(size > 0 && size <= static_cast< uintmax_t >((std::numeric_limits< off_t >::max)()))) + { + while (true) + { + // Note: We intentionally use fallocate rather than posix_fallocate to avoid + // invoking glibc emulation that writes zeros to the end of the file. + // We want this call to act like a hint to the filesystem and an early + // check for the free storage space. We don't want to write zeros only + // to later overwrite them with the actual data. + int err = fallocate(file, FALLOC_FL_KEEP_SIZE, 0, static_cast< off_t >(size)); + if (BOOST_UNLIKELY(err != 0)) + { + err = errno; + + // Ignore the error if the operation is not supported by the kernel or filesystem + if (err == EOPNOTSUPP || err == ENOSYS) + break; + + if (err == EINTR) + continue; + + return err; + } + + break; + } + } +#endif + + return 0; +} + +//! copy_file implementation wrapper that preallocates storage for the target file +template< typename CopyFileData > +struct copy_file_data_preallocate +{ + //! copy_file implementation wrapper that preallocates storage for the target file before invoking the underlying copy implementation + static int impl(int infile, int outfile, uintmax_t size, std::size_t blksize) + { + int err = preallocate_storage(outfile, size); + if (BOOST_UNLIKELY(err != 0)) + return err; + + return CopyFileData::impl(infile, outfile, size, blksize); + } +}; + // Min and max buffer sizes are selected to minimize the overhead from system calls. // The values are picked based on coreutils cp(1) benchmarking data described here: // https://github.com/coreutils/coreutils/blob/d1b0257077c0b0f0ee25087efd46270345d1dd1f/src/ioblksize.h#L23-L72 @@ -864,7 +915,7 @@ struct copy_file_data_copy_file_range if (err == ENOSYS) { #if defined(BOOST_FILESYSTEM_USE_SENDFILE) - filesystem::detail::atomic_store_relaxed(copy_file_data, &check_fs_type< copy_file_data_sendfile >); + filesystem::detail::atomic_store_relaxed(copy_file_data, &check_fs_type< copy_file_data_preallocate< copy_file_data_sendfile > >); goto fallback_to_sendfile; #else filesystem::detail::atomic_store_relaxed(copy_file_data, ©_file_data_read_write); @@ -941,14 +992,14 @@ inline void init_copy_file_data_impl(unsigned int major_ver, unsigned int minor_ #if defined(BOOST_FILESYSTEM_USE_SENDFILE) // sendfile started accepting file descriptors as the target in Linux 2.6.33 if (major_ver > 2u || (major_ver == 2u && (minor_ver > 6u || (minor_ver == 6u && patch_ver >= 33u)))) - cfd = &check_fs_type< copy_file_data_sendfile >; + cfd = &check_fs_type< copy_file_data_preallocate< copy_file_data_sendfile > >; #endif #if defined(BOOST_FILESYSTEM_USE_COPY_FILE_RANGE) // Although copy_file_range appeared in Linux 4.5, it did not support cross-filesystem copying until 5.3. // copy_file_data_copy_file_range will fallback to copy_file_data_sendfile if copy_file_range returns EXDEV. if (major_ver > 4u || (major_ver == 4u && minor_ver >= 5u)) - cfd = &check_fs_type< copy_file_data_copy_file_range >; + cfd = &check_fs_type< copy_file_data_preallocate< copy_file_data_copy_file_range > >; #endif filesystem::detail::atomic_store_relaxed(copy_file_data, cfd);