From 61352a3b5b580de5e69a6356cae4f488fb767233 Mon Sep 17 00:00:00 2001 From: Mohammad Nejati Date: Sun, 3 Nov 2024 16:17:56 +0000 Subject: [PATCH 1/4] Burl: reuse the established connection when possible --- example/client/burl/main.cpp | 186 +++++++++++++++++++---------------- 1 file changed, 100 insertions(+), 86 deletions(-) diff --git a/example/client/burl/main.cpp b/example/client/burl/main.cpp index d6f268d..66688c1 100644 --- a/example/client/burl/main.cpp +++ b/example/client/burl/main.cpp @@ -107,8 +107,8 @@ target(urls::url_view url) noexcept struct is_redirect_result { - bool is_redirect; - bool need_method_change; + bool is_redirect = false; + bool need_method_change = false; }; is_redirect_result @@ -119,18 +119,36 @@ is_redirect(http_proto::status status) noexcept // user agents do change the method in practice. switch(status) { - case http_proto::status::moved_permanently: - case http_proto::status::found: - case http_proto::status::see_other: - return { true, true }; - case http_proto::status::temporary_redirect: - case http_proto::status::permanent_redirect: - return { true, false }; - default: - return { false, false }; + case http_proto::status::moved_permanently: + case http_proto::status::found: + case http_proto::status::see_other: + return { true, true }; + case http_proto::status::temporary_redirect: + case http_proto::status::permanent_redirect: + return { true, false }; + default: + return { false, false }; } } +bool +can_reuse_connection( + http_proto::response_view response, + urls::url_view a, + urls::url_view b) noexcept +{ + if(a.encoded_origin() != b.encoded_origin()) + return false; + + if(response.version() != http_proto::version::http_1_1) + return false; + + if(response.metadata().connection.close) + return false; + + return true; +} + class any_stream { public: @@ -378,8 +396,7 @@ class multipart_form std::optional file_size; }; - // storage_ containts boundary with extra "--" prefix and postfix. - // This reduces the number of steps needed during serialization. + // boundary with extra "--" prefix and postfix. std::array storage_{ generate_boundary() }; std::vector parts_; @@ -554,61 +571,61 @@ class multipart_form::source { switch(step_) { - case 0: - // --boundary - if(!copy({ form_->storage_.data(), - form_->storage_.size() - 2 })) return rs; - ++step_; - case 1: - if(!copy(content_disposition_)) return rs; - ++step_; - case 2: - if(!copy(it_->name)) return rs; - ++step_; - case 3: - if(!copy("\"")) return rs; - ++step_; - case 4: - if(!it_->file_size.has_value()) - goto content_type; - if(!copy(filename_)) return rs; - ++step_; - case 5: - if(!copy(filename(it_->value_or_path))) return rs; - ++step_; - case 6: - if(!copy("\"")) return rs; - ++step_; - case 7: - content_type: - if(it_->content_type.empty()) - goto end_of_header; - if(!copy(content_type_)) return rs; - ++step_; - case 8: - if(!copy(it_->content_type)) return rs; - ++step_; - case 9: - end_of_header: - if(!copy("\r\n\r\n")) return rs; - ++step_; - case 10: - if(it_->file_size) - { - if(!read( - it_->value_or_path, - it_->file_size.value())) return rs; - } - else - { - if(!copy(it_->value_or_path)) return rs; - } - ++step_; - case 11: - if(!copy("\r\n")) - return rs; - step_ = 0; - ++it_; + case 0: + // --boundary + if(!copy({ form_->storage_.data(), + form_->storage_.size() - 2 })) return rs; + ++step_; + case 1: + if(!copy(content_disposition_)) return rs; + ++step_; + case 2: + if(!copy(it_->name)) return rs; + ++step_; + case 3: + if(!copy("\"")) return rs; + ++step_; + case 4: + if(!it_->file_size.has_value()) + goto content_type; + if(!copy(filename_)) return rs; + ++step_; + case 5: + if(!copy(filename(it_->value_or_path))) return rs; + ++step_; + case 6: + if(!copy("\"")) return rs; + ++step_; + case 7: + content_type: + if(it_->content_type.empty()) + goto end_of_header; + if(!copy(content_type_)) return rs; + ++step_; + case 8: + if(!copy(it_->content_type)) return rs; + ++step_; + case 9: + end_of_header: + if(!copy("\r\n\r\n")) return rs; + ++step_; + case 10: + if(it_->file_size) + { + if(!read( + it_->value_or_path, + it_->file_size.value())) return rs; + } + else + { + if(!copy(it_->value_or_path)) return rs; + } + ++step_; + case 11: + if(!copy("\r\n")) + return rs; + step_ = 0; + ++it_; } } @@ -687,7 +704,6 @@ class message } }; - asio::awaitable connect(ssl::context& ssl_ctx, urls::url_view url) { @@ -809,7 +825,7 @@ request( co_await http_io::async_read_header(stream, parser); // handle redirects - auto referer_url = urls::url{ url }; + auto referer = urls::url{ url }; for(;;) { auto [is_redirect, need_method_change] = @@ -822,11 +838,16 @@ request( if(auto it = response.find(http_proto::field::location); it != response.end()) { - auto redirect_url = urls::parse_uri(it->value).value(); + auto redirect = urls::parse_uri(it->value).value(); + + // Consume the body + co_await http_io::async_read(stream, parser); - // TODO: reuse the established connection when possible - co_await stream.async_shutdown(asio::as_tuple); - stream = co_await connect(ssl_ctx, redirect_url); + if(!can_reuse_connection(response, referer, redirect)) + { + co_await stream.async_shutdown(asio::as_tuple); + stream = co_await connect(ssl_ctx, redirect); + } // Change the method according to RFC 9110, Section 15.4.4. if(need_method_change && !vm.count("head")) @@ -836,11 +857,11 @@ request( request.erase(http_proto::field::content_type); msg = {}; // drop the body } - request.set_target(target(redirect_url)); - request.set(http_proto::field::host, redirect_url.host()); - request.set(http_proto::field::referer, referer_url); + request.set_target(target(redirect)); + request.set(http_proto::field::host, redirect.host()); + request.set(http_proto::field::referer, redirect); - referer_url = redirect_url; + referer = redirect; serializer.reset(); msg.start_serializer(serializer, request); @@ -891,8 +912,6 @@ request( int main(int argc, char* argv[]) { - int co_main(int argc, char* argv[]); - //return co_main(argc, argv); try { auto odesc = po::options_description{"Options"}; @@ -963,12 +982,7 @@ main(int argc, char* argv[]) auto url = urls::parse_uri(vm.at("url").as()); if(url.has_error()) - { - std::cerr - << "Failed to parse URL\n" - << "Error: " << url.error().what() << std::endl; - return EXIT_FAILURE; - } + throw system_error{ url.error(), "Failed to parse URL" }; auto ioc = asio::io_context{}; auto ssl_ctx = ssl::context{ ssl::context::tlsv12_client }; From 322da079b25858e8d8017b6dcc1f63651e14cea9 Mon Sep 17 00:00:00 2001 From: Mohammad Nejati Date: Sun, 3 Nov 2024 19:34:02 +0000 Subject: [PATCH 2/4] Burl: HTTP proxy support --- example/client/burl/main.cpp | 137 +++++++++++++++++++++++++++++------ 1 file changed, 114 insertions(+), 23 deletions(-) diff --git a/example/client/burl/main.cpp b/example/client/burl/main.cpp index 66688c1..8f568cd 100644 --- a/example/client/burl/main.cpp +++ b/example/client/burl/main.cpp @@ -105,6 +105,22 @@ target(urls::url_view url) noexcept return url.encoded_target(); } +core::string_view +effective_port(urls::url_view url) +{ + if(url.has_port()) + return url.port(); + + if(url.scheme() == "https") + return "443"; + + if(url.scheme() == "http") + return "80"; + + throw std::runtime_error{ + "Unsupported scheme" }; +} + struct is_redirect_result { bool is_redirect = false; @@ -705,31 +721,97 @@ class message }; asio::awaitable -connect(ssl::context& ssl_ctx, urls::url_view url) +connect( + const po::variables_map& vm, + ssl::context& ssl_ctx, + http_proto::context& http_proto_ctx, + urls::url_view url) { auto executor = co_await asio::this_coro::executor; auto resolver = asio::ip::tcp::resolver{ executor }; - auto service = url.has_port() ? url.port() : url.scheme(); - auto rresults = co_await resolver.async_resolve(url.host(), service); + auto stream = asio::ip::tcp::socket{ executor }; + + if(vm.count("proxy")) + { + auto proxy_url = urls::parse_uri(vm.at("proxy").as()); + + if(proxy_url.has_error()) + throw system_error{ proxy_url.error(), "Failed to parse proxy" }; + + if(proxy_url->scheme() != "http") + throw std::runtime_error{ "only HTTP proxies are supported" }; + + // Connect to the HTTP proxy server + auto rr = co_await resolver.async_resolve( + proxy_url->host(), effective_port(proxy_url.value())); + co_await asio::async_connect(stream, rr); + + { + using http_proto::field; + auto request = http_proto::request{}; + auto host = std::string{ url.encoded_host() }; + + host.push_back(':'); + host.append(effective_port(url)); + + request.set_method(http_proto::method::connect); + request.set_target(host); + request.set(field::host, host); + request.set(field::proxy_connection, "keep-alive"); + + if(vm.count("user-agent")) + { + request.set( + field::user_agent, + vm.at("user-agent").as()); + } + else + { + request.set(field::user_agent, "Boost.Http.Io"); + } + + // TODO + // request.set(field::proxy_authorization, ""); + + auto serializer = http_proto::serializer{ http_proto_ctx }; + serializer.start(request); + co_await http_io::async_write(stream, serializer); + } + + { + auto parser = http_proto::response_parser{ http_proto_ctx }; + parser.reset(); + parser.start(); + co_await http_io::async_read_header(stream, parser); + if(parser.get().status() != http_proto::status::ok) + throw std::runtime_error{ + "Proxy server rejected the connection" }; + } + } + else // no proxy + { + auto rr = co_await resolver.async_resolve( + url.host(), effective_port(url)); + co_await asio::async_connect(stream, rr); + } if(url.scheme() == "https") { - auto stream = ssl::stream{ executor, ssl_ctx }; - co_await asio::async_connect(stream.lowest_layer(), rresults); + auto ssl_stream = ssl::stream{ + std::move(stream), ssl_ctx }; - if(auto host_s = std::string{ url.host() }; - !SSL_set_tlsext_host_name(stream.native_handle(), host_s.c_str())) + auto host = std::string{ url.host() }; + if(!SSL_set_tlsext_host_name( + ssl_stream.native_handle(), host.c_str())) { throw system_error{ static_cast(::ERR_get_error()), asio::error::get_ssl_category() }; } - co_await stream.async_handshake(ssl::stream_base::client); - co_return stream; + co_await ssl_stream.async_handshake(ssl::stream_base::client); + co_return ssl_stream; } - auto stream = asio::ip::tcp::socket{ executor }; - co_await asio::async_connect(stream, rresults); co_return stream; } @@ -813,7 +895,7 @@ request( http_proto::request request, urls::url_view url) { - auto stream = co_await connect(ssl_ctx, url); + auto stream = co_await connect(vm, ssl_ctx, http_proto_ctx, url); auto parser = http_proto::response_parser{ http_proto_ctx }; auto serializer = http_proto::serializer{ http_proto_ctx }; @@ -838,15 +920,18 @@ request( if(auto it = response.find(http_proto::field::location); it != response.end()) { - auto redirect = urls::parse_uri(it->value).value(); + auto location = urls::parse_uri(it->value).value(); // Consume the body co_await http_io::async_read(stream, parser); - if(!can_reuse_connection(response, referer, redirect)) + if(!can_reuse_connection(response, referer, location)) { - co_await stream.async_shutdown(asio::as_tuple); - stream = co_await connect(ssl_ctx, redirect); + if(!vm.count("proxy")) + co_await stream.async_shutdown(asio::as_tuple); + + stream = co_await connect( + vm, ssl_ctx, http_proto_ctx, location); } // Change the method according to RFC 9110, Section 15.4.4. @@ -857,11 +942,11 @@ request( request.erase(http_proto::field::content_type); msg = {}; // drop the body } - request.set_target(target(redirect)); - request.set(http_proto::field::host, redirect.host()); - request.set(http_proto::field::referer, redirect); + request.set_target(target(location)); + request.set(http_proto::field::host, location.host()); + request.set(http_proto::field::referer, location); - referer = redirect; + referer = location; serializer.reset(); msg.start_serializer(serializer, request); @@ -904,9 +989,12 @@ request( } // clean shutdown - auto [ec] = co_await stream.async_shutdown(asio::as_tuple); - if(ec && ec != ssl::error::stream_truncated) - throw system_error{ ec }; + if(!vm.count("proxy")) + { + auto [ec] = co_await stream.async_shutdown(asio::as_tuple); + if(ec && ec != ssl::error::stream_truncated) + throw system_error{ ec }; + } }; int @@ -936,6 +1024,9 @@ main(int argc, char* argv[]) ("output,o", po::value()->value_name(""), "Write to file instead of stdout") + ("proxy,x", + po::value()->value_name(""), + "Use this proxy") ("range,r", po::value()->value_name(""), "Retrieve only the bytes within range") From 21528bd03b132ed4f1f3be8a70628989369a01ef Mon Sep 17 00:00:00 2001 From: Mohammad Nejati Date: Mon, 4 Nov 2024 12:00:54 +0000 Subject: [PATCH 3/4] Burl: SOCKS5 proxy support --- example/client/burl/main.cpp | 231 +++++++++++++++++++++++++++-------- 1 file changed, 183 insertions(+), 48 deletions(-) diff --git a/example/client/burl/main.cpp b/example/client/burl/main.cpp index 8f568cd..1890eff 100644 --- a/example/client/burl/main.cpp +++ b/example/client/burl/main.cpp @@ -720,6 +720,174 @@ class message } }; +asio::awaitable +connect_socks5_proxy( + asio::ip::tcp::socket& stream, + urls::url_view url, + urls::url_view proxy) +{ + auto executor = co_await asio::this_coro::executor; + auto resolver = asio::ip::tcp::resolver{ executor }; + auto rresults = co_await resolver.async_resolve( + proxy.host(), effective_port(proxy)); + + // Connect to the proxy server + co_await asio::async_connect(stream, rresults); + + // Greeting request + if(proxy.has_userinfo()) + { + std::uint8_t greeting_req[4] = { 0x05, 0x02, 0x00, 0x02 }; + co_await asio::async_write(stream, asio::buffer(greeting_req)); + } + else + { + std::uint8_t greeting_req[3] = { 0x05, 0x01, 0x00 }; + co_await asio::async_write(stream, asio::buffer(greeting_req)); + } + + // Greeting response + std::uint8_t greeting_resp[2]; + co_await asio::async_read(stream, asio::buffer(greeting_resp)); + + if(greeting_resp[0] != 0x05) + throw std::runtime_error{ "SOCKS5 invalid version" }; + + switch(greeting_resp[1]) + { + case 0x00: // No Authentication + break; + case 0x02: // Username/password + { + // Authentication request + auto auth_req = std::string{ 0x01 }; + + auto user = proxy.encoded_user(); + auth_req.push_back(static_cast(user.decoded_size())); + user.decode({}, urls::string_token::append_to(auth_req)); + + auto pass = proxy.encoded_password(); + auth_req.push_back(static_cast(pass.decoded_size())); + pass.decode({}, urls::string_token::append_to(auth_req)); + + co_await asio::async_write(stream, asio::buffer(auth_req)); + + // Authentication response + std::uint8_t greeting_resp[2]; + co_await asio::async_read(stream, asio::buffer(greeting_resp)); + + if(greeting_resp[1] != 0x00) + throw std::runtime_error{ + "SOCKS5 authentication failed" }; + break; + } + default: + throw std::runtime_error{ + "SOCKS5 no acceptable authentication method" + }; + } + + // Connection request + auto conn_req = std::string{ 0x05, 0x01, 0x00, 0x03 }; + auto host = url.encoded_host(); + conn_req.push_back(static_cast(host.decoded_size())); + host.decode({}, urls::string_token::append_to(conn_req)); + + std::uint16_t port = std::stoi(effective_port(url)); + conn_req.push_back(static_cast((port >> 8) & 0xFF)); + conn_req.push_back(static_cast(port & 0xFF)); + + co_await asio::async_write(stream, asio::buffer(conn_req)); + + // Connection response + std::uint8_t conn_resp_head[5]; + co_await asio::async_read(stream, asio::buffer(conn_resp_head)); + + if(conn_resp_head[1] != 0x00) + throw std::runtime_error{ + "SOCKS5 connection request failed" }; + + std::string conn_resp_tail; + conn_resp_tail.resize( + [&]() + { + // subtract 1 because we have pre-read one byte + switch(conn_resp_head[3]) + { + case 0x01: + return 4 + 2 - 1; // ipv4 + port + case 0x03: + return conn_resp_head[4] + 2 - 1; // domain name + port + case 0x04: + return 16 + 2 - 1; // ipv6 + port + default: + throw std::runtime_error{ + "SOCKS5 invalid address type" }; + } + }()); + co_await asio::async_read(stream, asio::buffer(conn_resp_tail)); +} + +asio::awaitable +connect_http_proxy( + const po::variables_map& vm, + http_proto::context& http_proto_ctx, + asio::ip::tcp::socket& stream, + urls::url_view url, + urls::url_view proxy) +{ + auto executor = co_await asio::this_coro::executor; + auto resolver = asio::ip::tcp::resolver{ executor }; + auto rresults = co_await resolver.async_resolve( + proxy.host(), effective_port(proxy)); + + // Connect to the proxy server + co_await asio::async_connect(stream, rresults); + + using http_proto::field; + auto request = http_proto::request{}; + auto host_port = [&]() + { + auto rs = url.encoded_host().decode(); + rs.push_back(':'); + rs.append(effective_port(url)); + return rs; + }(); + + request.set_method(http_proto::method::connect); + request.set_target(host_port); + request.set(field::host, host_port); + request.set(field::proxy_connection, "keep-alive"); + + if(vm.count("user-agent")) + { + request.set( + field::user_agent, + vm.at("user-agent").as()); + } + else + { + request.set(field::user_agent, "Boost.Http.Io"); + } + + // TODO + // request.set(field::proxy_authorization, ""); + + auto serializer = http_proto::serializer{ http_proto_ctx }; + auto parser = http_proto::response_parser{ http_proto_ctx }; + + serializer.start(request); + co_await http_io::async_write(stream, serializer); + + parser.reset(); + parser.start(); + co_await http_io::async_read_header(stream, parser); + + if(parser.get().status() != http_proto::status::ok) + throw std::runtime_error{ + "Proxy server rejected the connection" }; +} + asio::awaitable connect( const po::variables_map& vm, @@ -728,7 +896,6 @@ connect( urls::url_view url) { auto executor = co_await asio::this_coro::executor; - auto resolver = asio::ip::tcp::resolver{ executor }; auto stream = asio::ip::tcp::socket{ executor }; if(vm.count("proxy")) @@ -738,63 +905,31 @@ connect( if(proxy_url.has_error()) throw system_error{ proxy_url.error(), "Failed to parse proxy" }; - if(proxy_url->scheme() != "http") - throw std::runtime_error{ "only HTTP proxies are supported" }; - - // Connect to the HTTP proxy server - auto rr = co_await resolver.async_resolve( - proxy_url->host(), effective_port(proxy_url.value())); - co_await asio::async_connect(stream, rr); - + if(proxy_url->scheme() == "http") { - using http_proto::field; - auto request = http_proto::request{}; - auto host = std::string{ url.encoded_host() }; - - host.push_back(':'); - host.append(effective_port(url)); - - request.set_method(http_proto::method::connect); - request.set_target(host); - request.set(field::host, host); - request.set(field::proxy_connection, "keep-alive"); - - if(vm.count("user-agent")) - { - request.set( - field::user_agent, - vm.at("user-agent").as()); - } - else - { - request.set(field::user_agent, "Boost.Http.Io"); - } - - // TODO - // request.set(field::proxy_authorization, ""); - - auto serializer = http_proto::serializer{ http_proto_ctx }; - serializer.start(request); - co_await http_io::async_write(stream, serializer); + co_await connect_http_proxy( + vm, http_proto_ctx, stream, url, proxy_url.value()); } - + else if(proxy_url->scheme() == "socks5") { - auto parser = http_proto::response_parser{ http_proto_ctx }; - parser.reset(); - parser.start(); - co_await http_io::async_read_header(stream, parser); - if(parser.get().status() != http_proto::status::ok) - throw std::runtime_error{ - "Proxy server rejected the connection" }; + co_await connect_socks5_proxy( + stream, url, proxy_url.value()); + } + else + { + throw std::runtime_error{ + "only HTTP and SOCKS5 proxies are supported" }; } } else // no proxy { - auto rr = co_await resolver.async_resolve( + auto resolver = asio::ip::tcp::resolver{ executor }; + auto rresults = co_await resolver.async_resolve( url.host(), effective_port(url)); - co_await asio::async_connect(stream, rr); + co_await asio::async_connect(stream, rresults); } + // TLS handshake if(url.scheme() == "https") { auto ssl_stream = ssl::stream{ From bf3f1912c109ccf91b27c14aa5aeac1d71b5acd8 Mon Sep 17 00:00:00 2001 From: Mohammad Nejati Date: Mon, 4 Nov 2024 15:25:29 +0000 Subject: [PATCH 4/4] Burl: HTTP basic access authentication support --- example/client/burl/main.cpp | 58 +++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/example/client/burl/main.cpp b/example/client/burl/main.cpp index 1890eff..4863802 100644 --- a/example/client/burl/main.cpp +++ b/example/client/burl/main.cpp @@ -165,6 +165,49 @@ can_reuse_connection( return true; } +void +base64_encode(std::string& dest, core::string_view src) +{ + // Adapted from Boost.Beast project + char const* in = static_cast(src.data()); + static char constexpr tab[] = { + "ABCDEFGHIJKLMNOP" + "QRSTUVWXYZabcdef" + "ghijklmnopqrstuv" + "wxyz0123456789+/" + }; + + for(auto n = src.size() / 3; n--;) + { + dest.append({ + tab[(in[0] & 0xfc) >> 2], + tab[((in[0] & 0x03) << 4) + ((in[1] & 0xf0) >> 4)], + tab[((in[2] & 0xc0) >> 6) + ((in[1] & 0x0f) << 2)], + tab[in[2] & 0x3f] }); + in += 3; + } + + switch(src.size() % 3) + { + case 2: + dest.append({ + tab[ (in[0] & 0xfc) >> 2], + tab[((in[0] & 0x03) << 4) + ((in[1] & 0xf0) >> 4)], + tab[ (in[1] & 0x0f) << 2], + '=' }); + break; + case 1: + dest.append({ + tab[ (in[0] & 0xfc) >> 2], + tab[((in[0] & 0x03) << 4)], + '=', + '=' }); + break; + case 0: + break; + } +} + class any_stream { public: @@ -870,8 +913,13 @@ connect_http_proxy( request.set(field::user_agent, "Boost.Http.Io"); } - // TODO - // request.set(field::proxy_authorization, ""); + if(proxy.has_userinfo()) + { + auto credentials = proxy.encoded_userinfo().decode(); + auto basic_auth = std::string{ "Basic " }; + base64_encode(basic_auth, credentials); + request.set(field::proxy_authorization, basic_auth); + } auto serializer = http_proto::serializer{ http_proto_ctx }; auto parser = http_proto::response_parser{ http_proto_ctx }; @@ -1000,8 +1048,10 @@ create_request( if(vm.count("user")) { - // TODO: use base64 encoding for basic authentication - request.set(field::authorization, vm.at("user").as()); + auto credentials = vm.at("user").as(); + auto basic_auth = std::string{ "Basic " }; + base64_encode(basic_auth, credentials); + request.set(field::authorization, basic_auth); } if(vm.count("compressed") && http_proto_has_zlib)