From 2587d67a48df3306019fe9dc7f70cb6c7ca75480 Mon Sep 17 00:00:00 2001 From: Arthur LAURENT Date: Thu, 31 Oct 2024 13:28:55 +0100 Subject: [PATCH 1/9] enable usage of constexpr and inline constexpr for constants when C++ version is >= 11 --- src/pugixml.hpp | 83 +++++++++++++++++++++++++++++++------------------ 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/src/pugixml.hpp b/src/pugixml.hpp index f2d985e0..1712623f 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -136,6 +136,27 @@ # endif #endif +// If C++ is 2011 or higher, add 'constexpr' qualifiers +#ifndef PUGIXML_CONSTEXPR +# if __cplusplus >= 201103 +# define PUGIXML_CONSTEXPR constexpr +# elif defined(_MSC_VER) && _MSC_VER >= 1910 +# define PUGIXML_CONSTEXPR constexpr +# else +# define PUGIXML_CONSTEXPR const +# endif +#endif + +// If C++ is 2017 or higher, add 'inline' qualifiers for constants +// required for C++20 module +#ifndef PUGIXML_CONSTANT +# if __cplusplus >= 201703 +# define PUGIXML_CONSTANT inline PUGIXML_CONSTEXPR +# else +# define PUGIXML_CONSTANT PUGIXML_CONSTEXPR +# endif +#endif + // Character interface macros #ifdef PUGIXML_WCHAR_MODE # define PUGIXML_TEXT(t) L ## t @@ -182,69 +203,69 @@ namespace pugi // Minimal parsing mode (equivalent to turning all other flags off). // Only elements and PCDATA sections are added to the DOM tree, no text conversions are performed. - const unsigned int parse_minimal = 0x0000; + PUGIXML_CONSTANT unsigned int parse_minimal = 0x0000; // This flag determines if processing instructions (node_pi) are added to the DOM tree. This flag is off by default. - const unsigned int parse_pi = 0x0001; + PUGIXML_CONSTANT unsigned int parse_pi = 0x0001; // This flag determines if comments (node_comment) are added to the DOM tree. This flag is off by default. - const unsigned int parse_comments = 0x0002; + PUGIXML_CONSTANT unsigned int parse_comments = 0x0002; // This flag determines if CDATA sections (node_cdata) are added to the DOM tree. This flag is on by default. - const unsigned int parse_cdata = 0x0004; + PUGIXML_CONSTANT unsigned int parse_cdata = 0x0004; // This flag determines if plain character data (node_pcdata) that consist only of whitespace are added to the DOM tree. // This flag is off by default; turning it on usually results in slower parsing and more memory consumption. - const unsigned int parse_ws_pcdata = 0x0008; + PUGIXML_CONSTANT unsigned int parse_ws_pcdata = 0x0008; // This flag determines if character and entity references are expanded during parsing. This flag is on by default. - const unsigned int parse_escapes = 0x0010; + PUGIXML_CONSTANT unsigned int parse_escapes = 0x0010; // This flag determines if EOL characters are normalized (converted to #xA) during parsing. This flag is on by default. - const unsigned int parse_eol = 0x0020; + PUGIXML_CONSTANT unsigned int parse_eol = 0x0020; // This flag determines if attribute values are normalized using CDATA normalization rules during parsing. This flag is on by default. - const unsigned int parse_wconv_attribute = 0x0040; + PUGIXML_CONSTANT unsigned int parse_wconv_attribute = 0x0040; // This flag determines if attribute values are normalized using NMTOKENS normalization rules during parsing. This flag is off by default. - const unsigned int parse_wnorm_attribute = 0x0080; + PUGIXML_CONSTANT unsigned int parse_wnorm_attribute = 0x0080; // This flag determines if document declaration (node_declaration) is added to the DOM tree. This flag is off by default. - const unsigned int parse_declaration = 0x0100; + PUGIXML_CONSTANT unsigned int parse_declaration = 0x0100; // This flag determines if document type declaration (node_doctype) is added to the DOM tree. This flag is off by default. - const unsigned int parse_doctype = 0x0200; + PUGIXML_CONSTANT unsigned int parse_doctype = 0x0200; // This flag determines if plain character data (node_pcdata) that is the only child of the parent node and that consists only // of whitespace is added to the DOM tree. // This flag is off by default; turning it on may result in slower parsing and more memory consumption. - const unsigned int parse_ws_pcdata_single = 0x0400; + PUGIXML_CONSTANT unsigned int parse_ws_pcdata_single = 0x0400; // This flag determines if leading and trailing whitespace is to be removed from plain character data. This flag is off by default. - const unsigned int parse_trim_pcdata = 0x0800; + PUGIXML_CONSTANT unsigned int parse_trim_pcdata = 0x0800; // This flag determines if plain character data that does not have a parent node is added to the DOM tree, and if an empty document // is a valid document. This flag is off by default. - const unsigned int parse_fragment = 0x1000; + PUGIXML_CONSTANT unsigned int parse_fragment = 0x1000; // This flag determines if plain character data is be stored in the parent element's value. This significantly changes the structure of // the document; this flag is only recommended for parsing documents with many PCDATA nodes in memory-constrained environments. // This flag is off by default. - const unsigned int parse_embed_pcdata = 0x2000; + PUGIXML_CONSTANT unsigned int parse_embed_pcdata = 0x2000; // This flag determines whether determines whether the the two pcdata should be merged or not, if no intermediatory data are parsed in the document. // This flag is off by default. - const unsigned int parse_merge_pcdata = 0x4000; + PUGIXML_CONSTANT unsigned int parse_merge_pcdata = 0x4000; // The default parsing mode. // Elements, PCDATA and CDATA sections are added to the DOM tree, character/reference entities are expanded, // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. - const unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; + PUGIXML_CONSTANT unsigned int parse_default = parse_cdata | parse_escapes | parse_wconv_attribute | parse_eol; // The full parsing mode. // Nodes of all types are added to the DOM tree, character/reference entities are expanded, // End-of-Line characters are normalized, attribute values are normalized using CDATA normalization rules. - const unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype; + PUGIXML_CONSTANT unsigned int parse_full = parse_default | parse_pi | parse_comments | parse_declaration | parse_doctype; // These flags determine the encoding of input data for XML document enum xml_encoding @@ -264,41 +285,41 @@ namespace pugi // Formatting flags // Indent the nodes that are written to output stream with as many indentation strings as deep the node is in DOM tree. This flag is on by default. - const unsigned int format_indent = 0x01; + PUGIXML_CONSTANT unsigned int format_indent = 0x01; // Write encoding-specific BOM to the output stream. This flag is off by default. - const unsigned int format_write_bom = 0x02; + PUGIXML_CONSTANT unsigned int format_write_bom = 0x02; // Use raw output mode (no indentation and no line breaks are written). This flag is off by default. - const unsigned int format_raw = 0x04; + PUGIXML_CONSTANT unsigned int format_raw = 0x04; // Omit default XML declaration even if there is no declaration in the document. This flag is off by default. - const unsigned int format_no_declaration = 0x08; + PUGIXML_CONSTANT unsigned int format_no_declaration = 0x08; // Don't escape attribute values and PCDATA contents. This flag is off by default. - const unsigned int format_no_escapes = 0x10; + PUGIXML_CONSTANT unsigned int format_no_escapes = 0x10; // Open file using text mode in xml_document::save_file. This enables special character (i.e. new-line) conversions on some systems. This flag is off by default. - const unsigned int format_save_file_text = 0x20; + PUGIXML_CONSTANT unsigned int format_save_file_text = 0x20; // Write every attribute on a new line with appropriate indentation. This flag is off by default. - const unsigned int format_indent_attributes = 0x40; + PUGIXML_CONSTANT unsigned int format_indent_attributes = 0x40; // Don't output empty element tags, instead writing an explicit start and end tag even if there are no children. This flag is off by default. - const unsigned int format_no_empty_element_tags = 0x80; + PUGIXML_CONSTANT unsigned int format_no_empty_element_tags = 0x80; // Skip characters belonging to range [0; 32) instead of "&#xNN;" encoding. This flag is off by default. - const unsigned int format_skip_control_chars = 0x100; + PUGIXML_CONSTANT unsigned int format_skip_control_chars = 0x100; // Use single quotes ' instead of double quotes " for enclosing attribute values. This flag is off by default. - const unsigned int format_attribute_single_quote = 0x200; + PUGIXML_CONSTANT unsigned int format_attribute_single_quote = 0x200; // The default set of formatting flags. // Nodes are indented depending on their depth in DOM tree, a default declaration is output if document has none. - const unsigned int format_default = format_indent; + PUGIXML_CONSTANT unsigned int format_default = format_indent; - const int default_double_precision = 17; - const int default_float_precision = 9; + PUGIXML_CONSTANT int default_double_precision = 17; + PUGIXML_CONSTANT int default_float_precision = 9; // Forward declarations struct xml_attribute_struct; From bcf8b86026f33717c6a76e5c51576510376ba72a Mon Sep 17 00:00:00 2001 From: Arthur LAURENT Date: Thu, 31 Oct 2024 14:26:39 +0100 Subject: [PATCH 2/9] add support for C++20 module (and C++23 std module) --- src/pugixml.cpp | 48 +++++++++++++++++--------------- src/pugixml.cppm | 71 ++++++++++++++++++++++++++++++++++++++++++++++++ src/pugixml.hpp | 40 ++++++++++++++++----------- 3 files changed, 121 insertions(+), 38 deletions(-) create mode 100644 src/pugixml.cppm diff --git a/src/pugixml.cpp b/src/pugixml.cpp index b84658d2..6fbba89a 100644 --- a/src/pugixml.cpp +++ b/src/pugixml.cpp @@ -16,33 +16,35 @@ #include "pugixml.hpp" -#include -#include -#include -#include -#include - -#ifdef PUGIXML_WCHAR_MODE -# include -#endif +#ifndef PUGIXML_EXPORT_MODULE +# include +# include +# include +# include +# include + +# ifdef PUGIXML_WCHAR_MODE +# include +# endif -#ifndef PUGIXML_NO_XPATH -# include -# include -#endif +# ifndef PUGIXML_NO_XPATH +# include +# include +# endif -#ifndef PUGIXML_NO_STL -# include -# include -# include -#endif +# ifndef PUGIXML_NO_STL +# include +# include +# include +# endif // For placement new -#include +# include // For load_file -#if defined(__linux__) || defined(__APPLE__) -#include +# if defined(__linux__) || defined(__APPLE__) +# include +# endif #endif #ifdef _MSC_VER @@ -196,7 +198,9 @@ namespace pugi typedef unsigned __int32 uint32_t; } #else -# include +# ifndef PUGIXML_EXPORT_MODULE +# include +# endif #endif // Memory allocation diff --git a/src/pugixml.cppm b/src/pugixml.cppm new file mode 100644 index 00000000..c314f2b2 --- /dev/null +++ b/src/pugixml.cppm @@ -0,0 +1,71 @@ +/** + * pugixml parser - version 1.14 + * -------------------------------------------------------- + * Copyright (C) 2006-2024, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at https://pugixml.org/ + * + * This library is distributed under the MIT License. See notice at the end + * of this file. + * + * This work is based on the pugxml parser, which is: + * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) + */ + +module; + +#define PUGIXML_EXPORT_MODULE + +#include + +#ifndef PUGIXML_USE_STD_MODULE +# include +# include +# include +# include +# include +# include +# include +#endif + +#include +#include +#include +#include +#include +#include +#include + +#ifdef PUGIXML_WCHAR_MODE +# include +#endif + +#ifndef PUGIXML_NO_XPATH +# include +# include +#endif + +#if defined(__linux__) || defined(__APPLE__) +# include +#endif + +export module pugixml; + +#ifdef PUGIXML_USE_STD_MODULE +import std.compat; +#endif + +#define PUGIXML_MODULE_EXPORT export +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Winclude-angled-in-module-purview" +#endif +#include +#if defined(__clang__) +# pragma clang diagnostic pop +#endif + + +module :private; + +#define PUGIXML_SOURCE "pugixml.cpp" +#include PUGIXML_SOURCE diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 1712623f..eb2abff4 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -23,19 +23,21 @@ #ifndef HEADER_PUGIXML_HPP #define HEADER_PUGIXML_HPP +#ifndef PUGIXML_EXPORT_MODULE // Include stddef.h for size_t and ptrdiff_t -#include +# include // Include exception header for XPath -#if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS) -# include -#endif +# if !defined(PUGIXML_NO_XPATH) && !defined(PUGIXML_NO_EXCEPTIONS) +# include +# endif // Include STL headers -#ifndef PUGIXML_NO_STL -# include -# include -# include +# ifndef PUGIXML_NO_STL +# include +# include +# include +# endif #endif // Check if std::string_view is both requested and available @@ -48,8 +50,10 @@ #endif // Include string_view if appropriate -#ifdef PUGIXML_HAS_STRING_VIEW -# include +#ifndef PUGIXML_MODULE_EXPORT +# ifdef PUGIXML_HAS_STRING_VIEW +# include +# endif #endif // Macro for deprecated features @@ -150,11 +154,11 @@ // If C++ is 2017 or higher, add 'inline' qualifiers for constants // required for C++20 module #ifndef PUGIXML_CONSTANT -# if __cplusplus >= 201703 -# define PUGIXML_CONSTANT inline PUGIXML_CONSTEXPR -# else -# define PUGIXML_CONSTANT PUGIXML_CONSTEXPR -# endif +# if __cplusplus >= 201703 +# define PUGIXML_CONSTANT inline PUGIXML_CONSTEXPR +# else +# define PUGIXML_CONSTANT PUGIXML_CONSTEXPR +# endif #endif // Character interface macros @@ -166,6 +170,10 @@ # define PUGIXML_CHAR char #endif +#ifndef PUGIXML_MODULE_EXPORT +# define PUGIXML_MODULE_EXPORT +#endif + namespace pugi { // Character type used for all internal storage and operations; depends on PUGIXML_WCHAR_MODE @@ -183,7 +191,7 @@ namespace pugi } // The PugiXML namespace -namespace pugi +PUGIXML_MODULE_EXPORT namespace pugi { // Tree node types enum xml_node_type From 33df3e263b05615d9a3da1ee36407c9db92406f2 Mon Sep 17 00:00:00 2001 From: Arthur LAURENT Date: Thu, 31 Oct 2024 17:59:40 +0100 Subject: [PATCH 3/9] rename PUGIXML_CONSTEXPR macro to PUGIXML_CONSTEXPR11 --- src/pugixml.hpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/pugixml.hpp b/src/pugixml.hpp index eb2abff4..86db4cb3 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -141,13 +141,13 @@ #endif // If C++ is 2011 or higher, add 'constexpr' qualifiers -#ifndef PUGIXML_CONSTEXPR +#ifndef PUGIXML_CONSTEXPR11 # if __cplusplus >= 201103 -# define PUGIXML_CONSTEXPR constexpr +# define PUGIXML_CONSTEXPR11 constexpr # elif defined(_MSC_VER) && _MSC_VER >= 1910 -# define PUGIXML_CONSTEXPR constexpr +# define PUGIXML_CONSTEXPR11 constexpr # else -# define PUGIXML_CONSTEXPR const +# define PUGIXML_CONSTEXPR11 # endif #endif @@ -155,10 +155,12 @@ // required for C++20 module #ifndef PUGIXML_CONSTANT # if __cplusplus >= 201703 -# define PUGIXML_CONSTANT inline PUGIXML_CONSTEXPR +# define PUGIXML_CONSTANT inline PUGIXML_CONSTEXPR11 +# elif __cplusplus >= 201103 +# define PUGIXML_CONSTANT PUGIXML_CONSTEXPR11 # else -# define PUGIXML_CONSTANT PUGIXML_CONSTEXPR -# endif +# define PUGIXML_CONSTANT const +#endif #endif // Character interface macros From 2535c8960d7fcb5aae7c282d7e0507c6dfa693f7 Mon Sep 17 00:00:00 2001 From: Arthur LAURENT Date: Thu, 31 Oct 2024 18:00:49 +0100 Subject: [PATCH 4/9] fix inclusion of string_view when module is enabled --- src/pugixml.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 86db4cb3..f7c8ed9a 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -50,7 +50,7 @@ #endif // Include string_view if appropriate -#ifndef PUGIXML_MODULE_EXPORT +#ifndef PUGIXML_EXPORT_MODULE # ifdef PUGIXML_HAS_STRING_VIEW # include # endif From 0716ca1619c3bef5e3f33857997f724ccda85e4d Mon Sep 17 00:00:00 2001 From: Arthur LAURENT Date: Thu, 31 Oct 2024 18:03:20 +0100 Subject: [PATCH 5/9] improve some comments --- src/pugixml.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pugixml.hpp b/src/pugixml.hpp index f7c8ed9a..6d483699 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -140,7 +140,6 @@ # endif #endif -// If C++ is 2011 or higher, add 'constexpr' qualifiers #ifndef PUGIXML_CONSTEXPR11 # if __cplusplus >= 201103 # define PUGIXML_CONSTEXPR11 constexpr @@ -151,7 +150,8 @@ # endif #endif -// If C++ is 2017 or higher, add 'inline' qualifiers for constants +// If C++ is 2011 or higher, add 'constexpr' qualifiers to constants +// If C++ is 2017 or higher, add 'inline' qualifiers to constants // required for C++20 module #ifndef PUGIXML_CONSTANT # if __cplusplus >= 201703 From 19a31d6fad2aa06e1a57ecda6fdc490bd3085358 Mon Sep 17 00:00:00 2001 From: Arthur LAURENT Date: Thu, 31 Oct 2024 18:04:42 +0100 Subject: [PATCH 6/9] fix indent --- src/pugixml.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 6d483699..25fec2d1 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -160,7 +160,7 @@ # define PUGIXML_CONSTANT PUGIXML_CONSTEXPR11 # else # define PUGIXML_CONSTANT const -#endif +# endif #endif // Character interface macros From d57ccaf95308c42a3907b9e4064d733366ed6e7f Mon Sep 17 00:00:00 2001 From: Arthur LAURENT Date: Thu, 31 Oct 2024 18:05:38 +0100 Subject: [PATCH 7/9] improve some comments --- src/pugixml.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pugixml.hpp b/src/pugixml.hpp index 25fec2d1..689cb39f 100644 --- a/src/pugixml.hpp +++ b/src/pugixml.hpp @@ -152,7 +152,7 @@ // If C++ is 2011 or higher, add 'constexpr' qualifiers to constants // If C++ is 2017 or higher, add 'inline' qualifiers to constants -// required for C++20 module +// inline constexpr is required for C++20 module #ifndef PUGIXML_CONSTANT # if __cplusplus >= 201703 # define PUGIXML_CONSTANT inline PUGIXML_CONSTEXPR11 From 03e0a05f2696a6e11b845117132326ad935ef55f Mon Sep 17 00:00:00 2001 From: Arthur LAURENT Date: Thu, 31 Oct 2024 18:07:48 +0100 Subject: [PATCH 8/9] support "moduleonly" mode for C++20 module --- src/pugixml.cppm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pugixml.cppm b/src/pugixml.cppm index c314f2b2..6cdce354 100644 --- a/src/pugixml.cppm +++ b/src/pugixml.cppm @@ -67,5 +67,7 @@ import std.compat; module :private; -#define PUGIXML_SOURCE "pugixml.cpp" -#include PUGIXML_SOURCE +#if defined(PUGIXML_HEADER_ONLY) && !defined(PUGIXML_SOURCE) +# define PUGIXML_SOURCE "pugixml.cpp" +# include PUGIXML_SOURCE +#endif From 4a9169f0ee8e0edebf1ce0a74784883cd9506924 Mon Sep 17 00:00:00 2001 From: Arthur LAURENT Date: Thu, 31 Oct 2024 19:28:41 +0100 Subject: [PATCH 9/9] if PUGIXML_HEADERONLY is not defined put symbols in global module fragment --- src/pugixml.cppm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pugixml.cppm b/src/pugixml.cppm index 6cdce354..3b58b032 100644 --- a/src/pugixml.cppm +++ b/src/pugixml.cppm @@ -59,7 +59,13 @@ import std.compat; # pragma clang diagnostic push # pragma clang diagnostic ignored "-Winclude-angled-in-module-purview" #endif +#if !defined(PUGIXML_HEADER_ONLY) +extern "C++" { +#endif #include +#if !defined(PUGIXML_HEADER_ONLY) +} +#endif #if defined(__clang__) # pragma clang diagnostic pop #endif