| @@ -81,8 +81,9 @@ void migrate_repodb_3(nsql::database& db) { | |||
| CREATE TABLE dds_cat_remotes ( | |||
| remote_id INTEGER PRIMARY KEY AUTOINCREMENT, | |||
| name TEXT NOT NULL UNIQUE, | |||
| gen_ident TEXT NOT NULL, | |||
| remote_url TEXT NOT NULL | |||
| remote_url TEXT NOT NULL, | |||
| db_etag TEXT, | |||
| db_mtime TEXT | |||
| ); | |||
| CREATE TABLE dds_cat_pkgs_new ( | |||
| @@ -48,7 +48,7 @@ void http_remote_listing::pull_source(path_ref dest) const { | |||
| fs::create_directory(dl_path.parent_path()); | |||
| http_pool pool; | |||
| auto [client, resp] = pool.request_with_redirects("GET", url); | |||
| auto [client, resp] = pool.request(url); | |||
| auto dl_file = neo::file_stream::open(dl_path, neo::open_mode::write); | |||
| client.recv_body_into(resp, neo::stream_io_buffers{dl_file}); | |||
| @@ -27,12 +27,9 @@ struct remote_db { | |||
| temporary_dir _tempdir; | |||
| nsql::database db; | |||
| static remote_db download_and_open(neo::url const& url) { | |||
| http_pool pool; | |||
| auto [client, resp] = pool.request_with_redirects("GET", url); | |||
| auto tempdir = temporary_dir::create(); | |||
| auto repo_db_dl = tempdir.path() / "repo.db"; | |||
| static remote_db download_and_open(http_client& client, const http_response_info& resp) { | |||
| auto tempdir = temporary_dir::create(); | |||
| auto repo_db_dl = tempdir.path() / "repo.db"; | |||
| fs::create_directories(tempdir.path()); | |||
| auto outfile = neo::file_stream::open(repo_db_dl, neo::open_mode::write); | |||
| client.recv_body_into(resp, neo::stream_io_buffers(outfile)); | |||
| @@ -40,16 +37,6 @@ struct remote_db { | |||
| auto db = nsql::open(repo_db_dl.string()); | |||
| return {tempdir, std::move(db)}; | |||
| } | |||
| static remote_db download_and_open_for_base(neo::url url) { | |||
| auto repo_url = url; | |||
| repo_url.path = fs::path(url.path).append("repo.db").generic_string(); | |||
| return download_and_open(repo_url); | |||
| } | |||
| static remote_db download_and_open_for_base(std::string_view url_str) { | |||
| return download_and_open_for_base(neo::url::parse(url_str)); | |||
| } | |||
| }; | |||
| } // namespace | |||
| @@ -58,7 +45,15 @@ pkg_remote pkg_remote::connect(std::string_view url_str) { | |||
| DDS_E_SCOPE(e_url_string{std::string(url_str)}); | |||
| const auto url = neo::url::parse(url_str); | |||
| auto db = remote_db::download_and_open_for_base(url); | |||
| auto& pool = http_pool::global_pool(); | |||
| auto db_url = url; | |||
| while (db_url.path.ends_with("/")) | |||
| db_url.path.pop_back(); | |||
| auto full_path = fmt::format("{}/{}", db_url.path, "repo.db"); | |||
| db_url.path = full_path; | |||
| auto [client, resp] = pool.request(db_url, http_request_params{.method = "GET"}); | |||
| auto db = remote_db::download_and_open(client, resp); | |||
| auto name_st = db.db.prepare("SELECT name FROM dds_repo_meta"); | |||
| auto [name] = nsql::unpack_single<std::string>(name_st); | |||
| @@ -67,18 +62,39 @@ pkg_remote pkg_remote::connect(std::string_view url_str) { | |||
| void pkg_remote::store(nsql::database_ref db) { | |||
| auto st = db.prepare(R"( | |||
| INSERT INTO dds_cat_remotes (name, gen_ident, remote_url) | |||
| VALUES (?, ?, ?) | |||
| INSERT INTO dds_cat_remotes (name, remote_url) | |||
| VALUES (?, ?) | |||
| ON CONFLICT (name) DO | |||
| UPDATE SET gen_ident = ?2, remote_url = ?3 | |||
| UPDATE SET remote_url = ?2 | |||
| )"); | |||
| nsql::exec(st, _name, "[placeholder]", _base_url.to_string()); | |||
| nsql::exec(st, _name, _base_url.to_string()); | |||
| } | |||
| void pkg_remote::update_pkg_db(nsql::database_ref db) { | |||
| void pkg_remote::update_pkg_db(nsql::database_ref db, | |||
| std::optional<std::string_view> etag, | |||
| std::optional<std::string_view> db_mtime) { | |||
| dds_log(info, "Pulling repository contents for {} [{}]", _name, _base_url.to_string()); | |||
| auto rdb = remote_db::download_and_open_for_base(_base_url); | |||
| auto& pool = http_pool::global_pool(); | |||
| auto url = _base_url; | |||
| while (url.path.ends_with("/")) | |||
| url.path.pop_back(); | |||
| auto full_path = fmt::format("{}/{}", url.path, "repo.db"); | |||
| url.path = full_path; | |||
| auto [client, resp] = pool.request(url, | |||
| http_request_params{ | |||
| .method = "GET", | |||
| .prior_etag = etag.value_or(""), | |||
| .last_modified = db_mtime.value_or(""), | |||
| }); | |||
| if (resp.not_modified()) { | |||
| // Cache hit | |||
| dds_log(info, "Package database {} is up-to-date", _name); | |||
| client.discard_body(resp); | |||
| return; | |||
| } | |||
| auto rdb = remote_db::download_and_open(client, resp); | |||
| auto base_url_str = _base_url.to_string(); | |||
| while (base_url_str.ends_with("/")) { | |||
| @@ -140,7 +156,7 @@ void pkg_remote::update_pkg_db(nsql::database_ref db) { | |||
| )"); | |||
| // Validate our database | |||
| dds_log(trace, "Running integrity check"); | |||
| auto fk_check = db.prepare("PRAGMA foreign_key_check"); | |||
| auto fk_check = db.prepare("PRAGMA foreign_key_check"); | |||
| auto rows = nsql::iter_tuples<std::string, std::int64_t, std::string, std::string>(fk_check); | |||
| bool any_failed = false; | |||
| for (auto [child_table, rowid, parent_table, failed_idx] : rows) { | |||
| @@ -165,17 +181,33 @@ void pkg_remote::update_pkg_db(nsql::database_ref db) { | |||
| throw_external_error<errc::corrupted_catalog_db>( | |||
| "Database update failed due to data integrity errors"); | |||
| } | |||
| // Save the cache info for the remote | |||
| if (auto new_etag = resp.etag()) { | |||
| nsql::exec(db.prepare("UPDATE dds_cat_remotes SET db_etag = ? WHERE name = ?"), | |||
| *new_etag, | |||
| _name); | |||
| } | |||
| if (auto mtime = resp.last_modified()) { | |||
| nsql::exec(db.prepare("UPDATE dds_cat_remotes SET db_mtime = ? WHERE name = ?"), | |||
| *mtime, | |||
| _name); | |||
| } | |||
| } | |||
| void dds::update_all_remotes(nsql::database_ref db) { | |||
| dds_log(info, "Updating catalog from all remotes"); | |||
| auto repos_st = db.prepare("SELECT name, remote_url FROM dds_cat_remotes"); | |||
| auto tups = nsql::iter_tuples<std::string, std::string>(repos_st) | ranges::to_vector; | |||
| for (const auto& [name, remote_url] : tups) { | |||
| auto repos_st = db.prepare("SELECT name, remote_url, db_etag, db_mtime FROM dds_cat_remotes"); | |||
| auto tups = nsql::iter_tuples<std::string, | |||
| std::string, | |||
| std::optional<std::string>, | |||
| std::optional<std::string>>(repos_st) | |||
| | ranges::to_vector; | |||
| for (const auto& [name, remote_url, etag, db_mtime] : tups) { | |||
| DDS_E_SCOPE(e_url_string{remote_url}); | |||
| pkg_remote repo{name, neo::url::parse(remote_url)}; | |||
| repo.update_pkg_db(db); | |||
| repo.update_pkg_db(db, etag, db_mtime); | |||
| } | |||
| dds_log(info, "Recompacting database..."); | |||
| @@ -20,7 +20,9 @@ public: | |||
| static pkg_remote connect(std::string_view url); | |||
| void store(neo::sqlite3::database_ref); | |||
| void update_pkg_db(neo::sqlite3::database_ref); | |||
| void update_pkg_db(neo::sqlite3::database_ref, | |||
| std::optional<std::string_view> etag = {}, | |||
| std::optional<std::string_view> last_modified = {}); | |||
| }; | |||
| void update_all_remotes(neo::sqlite3::database_ref); | |||
| @@ -1,6 +1,7 @@ | |||
| #include "./pool.hpp" | |||
| #include <dds/error/errors.hpp> | |||
| #include <dds/util/log.hpp> | |||
| #include <dds/util/result.hpp> | |||
| #include <boost/leaf/exception.hpp> | |||
| @@ -31,6 +32,8 @@ struct http_client_impl { | |||
| _state_t _state = _state_t::ready; | |||
| bool _peer_disconnected = false; | |||
| neo::socket _conn; | |||
| std::string _host_string; | |||
| @@ -94,24 +97,34 @@ struct http_client_impl { | |||
| .parse_tail = {}, | |||
| }; | |||
| auto content_len_str = std::to_string(params.content_length); | |||
| auto hostname_port = fmt::format("{}:{}", origin.hostname, origin.port); | |||
| dds_log(trace, | |||
| " --> HTTP {} {}://{}:{}{}", | |||
| origin.protocol, | |||
| params.method, | |||
| origin.hostname, | |||
| origin.port, | |||
| params.path); | |||
| auto hostname_port = fmt::format("{}:{}", origin.hostname, origin.port); | |||
| std::pair<std::string_view, std::string_view> headers[] = { | |||
| std::vector<std::pair<std::string_view, std::string_view>> headers = { | |||
| {"Host", hostname_port}, | |||
| {"Accept", "*/*"}, | |||
| {"Content-Length", content_len_str}, | |||
| {"Content-Length", "0"}, | |||
| {"TE", "gzip, chunked, plain"}, | |||
| {"Connection", "keep-alive"}, | |||
| }; | |||
| if (!params.prior_etag.empty()) { | |||
| headers.push_back({"If-None-Match", params.prior_etag}); | |||
| } | |||
| if (!params.last_modified.empty()) { | |||
| headers.push_back({"If-Modified-Since", params.last_modified}); | |||
| } | |||
| _do_io([&](auto&& sink) { | |||
| neo::http::write_request(sink, start_line, headers, neo::const_buffer()); | |||
| }); | |||
| _state = _state_t::sent_req_head; | |||
| if (params.content_length == 0) { | |||
| _state = _state_t::sent_req_body; | |||
| } | |||
| _state = _state_t::sent_req_body; | |||
| } | |||
| http_response_info recv_head() { | |||
| @@ -130,6 +143,18 @@ struct http_client_impl { | |||
| if (clen_hdr && clen_hdr->value == "0") { | |||
| _state = _state_t::ready; | |||
| } | |||
| bool disconnect = false; | |||
| if (r.version == neo::http::version::v1_0) { | |||
| dds_log(trace, "HTTP/1.0 server will disconnect by default"); | |||
| disconnect = true; | |||
| } else if (r.version == neo::http::version::v1_1) { | |||
| disconnect = r.header_value("Connection") == "close"; | |||
| } else { | |||
| // Invalid version?? | |||
| disconnect = true; | |||
| } | |||
| _peer_disconnected = disconnect; | |||
| dds_log(trace, " <-- HTTP {} {}", r.status, r.status_message); | |||
| return r; | |||
| } | |||
| }; | |||
| @@ -149,6 +174,8 @@ struct http_pool_impl { | |||
| using namespace dds; | |||
| using client_impl_ptr = std::shared_ptr<detail::http_client_impl>; | |||
| http_pool::~http_pool() = default; | |||
| http_pool::http_pool() | |||
| @@ -156,8 +183,23 @@ http_pool::http_pool() | |||
| http_client::~http_client() { | |||
| // When the http_client is dropped, return its impl back to the connection pool for this origin | |||
| auto pool = _pool.lock(); | |||
| if (pool && _impl) { | |||
| if (!_impl) { | |||
| // We are moved-from | |||
| return; | |||
| } | |||
| neo_assert(expects, | |||
| _impl->_state == detail::http_client_impl::_state_t::ready, | |||
| "An http_client object was dropped while in a partial-request state. Did you read " | |||
| "the response header AND body?", | |||
| int(_impl->_state), | |||
| _impl->origin.protocol, | |||
| _impl->origin.hostname, | |||
| _impl->origin.port); | |||
| if (_impl->_peer_disconnected) { | |||
| // Do not return this connection to the pool. Let it destroy | |||
| return; | |||
| } | |||
| if (auto pool = _pool.lock()) { | |||
| pool->_clients.emplace(_impl->origin, _impl); | |||
| } | |||
| } | |||
| @@ -210,24 +252,40 @@ template <typename Stream> | |||
| struct recv_chunked_state : erased_message_body { | |||
| Stream& _strm; | |||
| neo::http::chunked_buffers<Stream&> _chunked{_strm}; | |||
| client_impl_ptr _client; | |||
| explicit recv_chunked_state(Stream& s) | |||
| : _strm(s) {} | |||
| explicit recv_chunked_state(Stream& s, client_impl_ptr c) | |||
| : _strm(s) | |||
| , _client(c) {} | |||
| neo::const_buffer next(std::size_t n) override { return _chunked.next(n); } | |||
| void consume(std::size_t n) override { _chunked.consume(n); } | |||
| neo::const_buffer next(std::size_t n) override { | |||
| auto part = _chunked.next(n); | |||
| if (neo::buffer_is_empty(part)) { | |||
| _client->_state = detail::http_client_impl::_state_t::ready; | |||
| } | |||
| return part; | |||
| } | |||
| void consume(std::size_t n) override { _chunked.consume(n); } | |||
| }; | |||
| template <typename Stream> | |||
| struct recv_gzip_state : erased_message_body { | |||
| Stream& _strm; | |||
| neo::gzip_source<Stream&> _gzip{_strm}; | |||
| client_impl_ptr _client; | |||
| explicit recv_gzip_state(Stream& s) | |||
| : _strm(s) {} | |||
| explicit recv_gzip_state(Stream& s, client_impl_ptr c) | |||
| : _strm(s) | |||
| , _client(c) {} | |||
| neo::const_buffer next(std::size_t n) override { return _gzip.next(n); } | |||
| void consume(std::size_t n) override { _gzip.consume(n); } | |||
| neo::const_buffer next(std::size_t n) override { | |||
| auto part = _gzip.next(n); | |||
| if (neo::buffer_is_empty(part)) { | |||
| _client->_state = detail::http_client_impl::_state_t::ready; | |||
| } | |||
| return part; | |||
| } | |||
| void consume(std::size_t n) override { _gzip.consume(n); } | |||
| }; | |||
| template <typename Stream> | |||
| @@ -236,12 +294,19 @@ struct recv_plain_state : erased_message_body { | |||
| std::size_t _size; | |||
| client_impl_ptr _client; | |||
| explicit recv_plain_state(Stream& s, std::size_t size) | |||
| explicit recv_plain_state(Stream& s, std::size_t size, client_impl_ptr cl) | |||
| : _strm(s) | |||
| , _size(size) {} | |||
| , _size(size) | |||
| , _client(cl) {} | |||
| neo::const_buffer next(std::size_t n) override { return _strm.next((std::min)(n, _size)); } | |||
| void consume(std::size_t n) override { | |||
| neo::const_buffer next(std::size_t n) override { | |||
| auto part = _strm.next((std::min)(n, _size)); | |||
| if (neo::buffer_is_empty(part)) { | |||
| _client->_state = detail::http_client_impl::_state_t::ready; | |||
| } | |||
| return part; | |||
| } | |||
| void consume(std::size_t n) override { | |||
| _size -= n; | |||
| return _strm.consume(n); | |||
| } | |||
| @@ -264,13 +329,20 @@ std::unique_ptr<erased_message_body> http_client::_make_body_reader(const http_r | |||
| return _impl->_do_io([&](auto&& source) -> std::unique_ptr<erased_message_body> { | |||
| using source_type = decltype(source); | |||
| if (res.content_length() == 0) { | |||
| dds_log(trace, "Empty response body"); | |||
| _set_ready(); | |||
| return std::make_unique<recv_none_state>(); | |||
| } else if (res.transfer_encoding() == "chunked") { | |||
| return std::make_unique<recv_chunked_state<source_type>>(source); | |||
| dds_log(trace, "Chunked response body"); | |||
| return std::make_unique<recv_chunked_state<source_type>>(source, _impl); | |||
| } else if (res.transfer_encoding() == "gzip") { | |||
| return std::make_unique<recv_gzip_state<source_type>>(source); | |||
| dds_log(trace, "GZip encoded response body"); | |||
| return std::make_unique<recv_gzip_state<source_type>>(source, _impl); | |||
| } else if (!res.transfer_encoding().has_value() && res.content_length() > 0) { | |||
| return std::make_unique<recv_plain_state<source_type>>(source, *res.content_length()); | |||
| dds_log(trace, "Plain response body"); | |||
| return std::make_unique<recv_plain_state<source_type>>(source, | |||
| *res.content_length(), | |||
| _impl); | |||
| } else { | |||
| neo_assert(invariant, | |||
| false, | |||
| @@ -297,23 +369,30 @@ void http_client::_set_ready() noexcept { | |||
| _impl->_state = detail::http_client_impl::_state_t::ready; | |||
| } | |||
| std::pair<http_client, http_response_info> | |||
| http_pool::request_with_redirects(std::string_view method, const neo::url& url_) { | |||
| auto url = url_; | |||
| request_result http_pool::request(neo::url url, http_request_params params) { | |||
| DDS_E_SCOPE(url); | |||
| for (auto i = 0; i <= 100; ++i) { | |||
| params.path = url.path; | |||
| params.query = url.query.value_or(""); | |||
| auto origin = network_origin::for_url(url); | |||
| auto client = client_for_origin(origin); | |||
| http_request_params params{ | |||
| .method = method, | |||
| .path = url.path, | |||
| .query = url.query.value_or(""), | |||
| }; | |||
| client.send_head(params); | |||
| auto resp = client.recv_head(); | |||
| DDS_E_SCOPE(resp); | |||
| if (dds::log::level_enabled(dds::log::level::trace)) { | |||
| for (auto hdr : resp.headers) { | |||
| dds_log(trace, " -- {}: {}", hdr.key, hdr.value); | |||
| } | |||
| } | |||
| if (resp.not_modified()) { | |||
| // Not Modified, a cache hit | |||
| return {std::move(client), std::move(resp)}; | |||
| } | |||
| if (resp.is_error()) { | |||
| client.discard_body(resp); | |||
| throw boost::leaf::exception(http_status_error("Received an error from HTTP")); | |||
| @@ -99,6 +99,13 @@ public: | |||
| void discard_body(const http_response_info&); | |||
| }; | |||
| struct request_result { | |||
| http_client client; | |||
| http_response_info resp; | |||
| void discard_body() { client.discard_body(resp); } | |||
| }; | |||
| class http_pool { | |||
| friend class http_client; | |||
| std::shared_ptr<detail::http_pool_impl> _impl; | |||
| @@ -109,37 +116,20 @@ public: | |||
| http_pool& operator=(http_pool&&) = default; | |||
| ~http_pool(); | |||
| http_client client_for_origin(const network_origin&); | |||
| http_response_info request(neo::url_view url) { return request(url, neo::mutable_buffer()); } | |||
| template <neo::buffer_output Output> | |||
| http_response_info request(neo::url_view url, Output&& out) { | |||
| return request(url, neo::const_buffer(), out); | |||
| static http_pool& thread_local_pool() { | |||
| thread_local http_pool inst; | |||
| return inst; | |||
| } | |||
| template <neo::buffer_input In, neo::buffer_output Out> | |||
| http_response_info request(neo::url_view url, In&& in, Out&& out) { | |||
| auto origin = network_origin::for_url(url); | |||
| auto size = neo::buffer_size(in); | |||
| auto client = client_for_origin(origin); | |||
| client.send_head(http_request_params{ | |||
| .method = "GET", | |||
| .path = url.path.empty() ? "/" : url.path, | |||
| .query = url.query.value_or(""), | |||
| .content_length = size, | |||
| }); | |||
| client.send_body(in); | |||
| auto resp = client.recv_head(); | |||
| client.recv_body_into(resp, out); | |||
| return resp; | |||
| static http_pool& global_pool() { | |||
| static http_pool inst; | |||
| return inst; | |||
| } | |||
| std::pair<http_client, http_response_info> | |||
| request_with_redirects(http_client& cl, const http_request_params& params); | |||
| http_client client_for_origin(const network_origin&); | |||
| std::pair<http_client, http_response_info> request_with_redirects(std::string_view method, | |||
| const neo::url& url); | |||
| request_result request(neo::url url, http_request_params params); | |||
| auto request(neo::url url) { return request(url, http_request_params{}); } | |||
| }; | |||
| } // namespace dds | |||
| @@ -12,16 +12,14 @@ TEST_CASE("Connect to a remote") { | |||
| // auto client = pool.access(); | |||
| auto cl = pool.client_for_origin({"https", "www.google.com", 443}); | |||
| cl.send_head({.method = "GET", .path = "/"}); | |||
| // cl.send_head({.method = "GET", .path = "/"}); | |||
| auto resp = cl.recv_head(); | |||
| CHECK(resp.status == 200); | |||
| CHECK(resp.status_message == "OK"); | |||
| cl.discard_body(resp); | |||
| } | |||
| TEST_CASE("Issue a request on a pool") { | |||
| dds::http_pool pool; | |||
| neo::string_dynbuf_io body; | |||
| auto resp = pool.request(neo::url_view::split("https://www.google.com"), body); | |||
| CHECK(resp.status == 200); | |||
| CHECK(body.read_area_view().size() > 5); | |||
| dds::http_pool pool; | |||
| auto resp = pool.request(neo::url::parse("https://www.google.com")); | |||
| resp.discard_body(); | |||
| } | |||
| @@ -7,11 +7,14 @@ | |||
| namespace dds { | |||
| struct http_request_params { | |||
| std::string_view method; | |||
| std::string_view path; | |||
| std::string_view query = ""; | |||
| std::size_t content_length = 0; | |||
| neo::http::headers headers{}; | |||
| std::string_view method = "GET"; | |||
| std::string_view path{}; | |||
| std::string_view query{}; | |||
| bool follow_redirects = true; | |||
| std::string_view prior_etag{}; | |||
| std::string_view last_modified{}; | |||
| }; | |||
| } // namespace dds | |||
| @@ -21,12 +21,15 @@ struct http_response_info { | |||
| bool is_server_error() const noexcept { return status >= 500 && status < 600; } | |||
| bool is_error() const noexcept { return is_client_error() || is_server_error(); } | |||
| bool is_redirect() const noexcept { return status >= 300 && status < 400; } | |||
| bool not_modified() const noexcept { return status == 304; } | |||
| std::optional<std::string_view> header_value(std::string_view key) const noexcept; | |||
| std::optional<int> content_length() const noexcept; | |||
| auto location() const noexcept { return header_value("Location"); } | |||
| auto transfer_encoding() const noexcept { return header_value("Transfer-Encoding"); } | |||
| auto etag() const noexcept { return header_value("ETag"); } | |||
| auto last_modified() const noexcept { return header_value("Last-Modified"); } | |||
| }; | |||
| } // namespace dds | |||
| @@ -17,6 +17,12 @@ void dds::notify_cancel() noexcept { got_signal = SIGINT; } | |||
| void dds::install_signal_handlers() noexcept { | |||
| std::signal(SIGINT, handle_signal); | |||
| std::signal(SIGTERM, handle_signal); | |||
| #ifdef SIGPIPE | |||
| // XXX: neo-io doesn't behave nicely when EOF is hit on sockets. This Isn't | |||
| // easily fixed portably without simply blocking SIGPIPE globally. | |||
| std::signal(SIGPIPE, SIG_IGN); | |||
| #endif | |||
| } | |||
| bool dds::is_cancelled() noexcept { return got_signal != 0; } | |||
| @@ -24,4 +30,4 @@ void dds::cancellation_point() { | |||
| if (is_cancelled()) { | |||
| throw user_cancelled(); | |||
| } | |||
| } | |||
| } | |||