Skip to content

Commit f2dd72f

Browse files
authored
Added function that converts Punycode encoded domains to Unicode (std::string) (#64)
* Moved some more files around * Added tests to convert from ascii encoded domains to unicode * Refactored tests * Renamed domain_to_unicode
1 parent 352dd31 commit f2dd72f

31 files changed

+230
-178
lines changed

include/skyr/core/url_record.hpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <string>
1111
#include <cstdint>
1212
#include <optional>
13+
#include <skyr/core/url_schemes.hpp>
1314

1415
namespace skyr {
1516
inline namespace v1 {
@@ -65,22 +66,42 @@ class url_record {
6566
/// Tests if the URL is a special scheme
6667
/// \returns `true` if the URL scheme is a special scheme, `false`
6768
/// otherwise
68-
[[nodiscard]] auto is_special() const noexcept -> bool;
69+
[[nodiscard]] auto is_special() const noexcept -> bool {
70+
return skyr::is_special(scheme);
71+
}
6972

7073
/// Tests if the URL includes credentials
7174
/// \returns `true` if the URL username or password is not an
7275
/// empty string, `false` otherwise
73-
[[nodiscard]] auto includes_credentials() const noexcept -> bool;
76+
[[nodiscard]] auto includes_credentials() const noexcept -> bool {
77+
return !username.empty() || !password.empty();
78+
}
7479

7580
/// Tests if the URL cannot have a username, password or port
7681
/// \returns `true` if the URL cannot have a username, password
7782
/// or port
78-
[[nodiscard]] auto cannot_have_a_username_password_or_port() const noexcept -> bool;
83+
[[nodiscard]] auto cannot_have_a_username_password_or_port() const noexcept -> bool {
84+
return
85+
(!host || host.value().empty()) ||
86+
cannot_be_a_base_url ||
87+
(scheme == "file");
88+
}
7989

8090
/// Swaps two `url_record` objects
8191
/// \param other Another `url_record` object
82-
void swap(url_record &other) noexcept;
83-
92+
void swap(url_record &other) noexcept {
93+
using std::swap;
94+
swap(scheme, other.scheme);
95+
swap(username, other.username);
96+
swap(password, other.password);
97+
swap(host, other.host);
98+
swap(port, other.port);
99+
swap(path, other.path);
100+
swap(query, other.query);
101+
swap(fragment, other.fragment);
102+
swap(cannot_be_a_base_url, other.cannot_be_a_base_url);
103+
swap(validation_error, other.validation_error);
104+
}
84105
};
85106

86107
/// Swaps two `url_record` objects
Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,53 +3,65 @@
33
// (See accompanying file LICENSE_1_0.txt or copy at
44
// http://www.boost.org/LICENSE_1_0.txt)
55

6+
#ifndef SKYR_URL_SCHEMES_INC
7+
#define SKYR_URL_SCHEMES_INC
8+
9+
#include <vector>
10+
#include <utility>
11+
#include <string>
12+
#include <string_view>
13+
#include <cstdint>
14+
#include <optional>
615
#include <algorithm>
7-
#include "url_schemes.hpp"
816

917
namespace skyr {
1018
inline namespace v1 {
1119
namespace details {
12-
auto special_schemes() noexcept -> const default_port_list & {
20+
using default_port_list = std::vector<std::pair<std::string, std::optional<std::uint16_t>>>;
21+
22+
inline auto special_schemes() noexcept -> const default_port_list & {
1323
static const auto schemes = default_port_list{
14-
{"ftp", 21},
15-
{"file", std::nullopt},
16-
{"http", 80},
17-
{"https", 443},
18-
{"ws", 80},
19-
{"wss", 443},
20-
};
24+
{"ftp", 21},
25+
{"file", std::nullopt},
26+
{"http", 80},
27+
{"https", 443},
28+
{"ws", 80},
29+
{"wss", 443},
30+
};
2131
return schemes;
2232
}
33+
} // namespace details
2334

24-
auto default_port(std::string_view scheme) noexcept -> std::optional<std::uint16_t> {
25-
auto schemes = special_schemes();
35+
/// \param scheme
36+
/// \returns
37+
inline auto is_special(std::string_view scheme) noexcept -> bool {
38+
const auto &schemes = details::special_schemes();
2639
auto first = begin(schemes), last = end(schemes);
2740
auto it = std::find_if(
2841
first, last,
2942
[&scheme](const auto &special_scheme) -> bool {
3043
return scheme == special_scheme.first;
3144
});
32-
if (it != last) {
33-
return it->second;
34-
}
35-
return std::nullopt;
45+
return (it != last);
3646
}
3747

38-
auto is_special(std::string_view scheme) noexcept -> bool {
39-
auto schemes = special_schemes();
48+
/// \param scheme
49+
/// \returns
50+
inline auto default_port(std::string_view scheme) noexcept -> std::optional<std::uint16_t> {
51+
const auto &schemes = details::special_schemes();
4052
auto first = begin(schemes), last = end(schemes);
4153
auto it = std::find_if(
4254
first, last,
4355
[&scheme](const auto &special_scheme) -> bool {
4456
return scheme == special_scheme.first;
4557
});
46-
return (it != last);
47-
}
48-
49-
auto is_default_port(std::string_view scheme, std::uint16_t port) noexcept -> bool {
50-
auto dport = default_port(scheme);
51-
return dport && (dport.value() == port);
58+
if (it != last) {
59+
return it->second;
60+
}
61+
return std::nullopt;
5262
}
53-
} // namespace details
5463
} // namespace v1
5564
} // namespace skyr
65+
66+
67+
#endif // SKYR_URL_SCHEMES_INC

include/skyr/domain/domain.hpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,31 @@
1515

1616
namespace skyr {
1717
inline namespace v1 {
18-
/// Converts a UTF-8 encoded domain to ASCII using
18+
/// Converts a UTF-32 encoded domain to ASCII using
1919
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
2020
///
2121
/// \param domain A domain
2222
/// \param be_strict Tells the processor to be strict
2323
/// \returns An ASCII domain, or an error
2424
auto domain_to_ascii(
25-
std::string_view domain,
26-
bool be_strict = false) -> tl::expected<std::string, std::error_code>;
25+
std::u32string_view domain,
26+
bool be_strict=false) -> tl::expected<std::string, std::error_code>;
2727

28-
/// Converts a UTF-32 encoded domain to ASCII using
28+
/// Converts a UTF-8 encoded domain to ASCII using
2929
/// [IDNA processing](https://www.domain.org/reports/tr46/#Processing)
3030
///
3131
/// \param domain A domain
3232
/// \param be_strict Tells the processor to be strict
3333
/// \returns An ASCII domain, or an error
3434
auto domain_to_ascii(
35-
std::u32string_view domain,
36-
bool be_strict = false) -> tl::expected<std::string, std::error_code>;
35+
std::string_view domain,
36+
bool be_strict=false) -> tl::expected<std::string, std::error_code>;
37+
38+
/// Converts a Punycode encoded domain to UTF-8
39+
///
40+
/// \param ascii A Punycode encoded domain
41+
/// \returns A valid UTF-8 encoded domain, or an error
42+
auto domain_to_unicode(std::string_view ascii) -> tl::expected<std::string, std::error_code>;
3743
} // namespace v1
3844
} // namespace skyr
3945

include/skyr/network/ipv4_address.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ class ipv4_address {
4747
/// Constructor
4848
/// \param address Sets the IPv4 address to `address`
4949
explicit ipv4_address(unsigned int address)
50-
: address_(details::to_network_byte_order(address)) {}
50+
: address_(to_network_byte_order(address)) {}
5151

5252
/// The address value
5353
/// \returns The address value
5454
[[nodiscard]] auto address() const noexcept {
55-
return details::from_network_byte_order(address_);
55+
return from_network_byte_order(address_);
5656
}
5757

5858
/// The address in bytes in network byte order

include/skyr/network/ipv6_address.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class ipv6_address {
5252
/// \param address Sets the IPv6 address to `address`
5353
explicit ipv6_address(std::array<unsigned short, 8> address) {
5454
for (auto i = 0UL; i < address.size(); ++i) {
55-
address_[i] = details::to_network_byte_order(address[i]);
55+
address_[i] = to_network_byte_order(address[i]);
5656
}
5757
}
5858

include/skyr/platform/endianness.hpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@
1111

1212
namespace skyr {
1313
inline namespace v1 {
14-
namespace details {
1514
inline auto is_big_endian() noexcept {
1615
const auto word = 0x0001;
1716
auto bytes = static_cast<const unsigned char *>(static_cast<const void *>(&word));
1817
return bytes[0] != 0x01;
1918
}
2019

20+
namespace details {
2121
template <typename intT>
2222
inline auto swap_endianness(
2323
intT v, typename std::enable_if<std::is_integral<intT>::value>::type * = nullptr) noexcept -> intT {
@@ -28,15 +28,17 @@ inline auto swap_endianness(
2828
}
2929
return *static_cast<const intT *>(static_cast<const void *>(bytes.data()));
3030
}
31+
} // namespace details
3132

32-
inline auto to_network_byte_order(unsigned int v) noexcept {
33-
return (is_big_endian()) ? v : swap_endianness(v);
33+
template <class intT>
34+
inline auto to_network_byte_order(intT v) noexcept {
35+
return (is_big_endian()) ? v : details::swap_endianness(v);
3436
}
3537

36-
inline auto from_network_byte_order(unsigned int v) noexcept {
37-
return (is_big_endian()) ? v : swap_endianness(v);
38+
template <class intT>
39+
inline auto from_network_byte_order(intT v) noexcept {
40+
return (is_big_endian()) ? v : details::swap_endianness(v);
3841
}
39-
} // namespace details
4042
} // namespace v1
4143
} // namespace skyr
4244

src/CMakeLists.txt

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,36 @@ configure_file(
99
)
1010

1111
set(skyr_SRCS
12+
core/url_parser_context.hpp
13+
core/url_parser_context.cpp
14+
core/url_parse.cpp
15+
core/url_parse_impl.hpp
16+
core/url_serialize.cpp
17+
core/url_error.cpp
1218
unicode/errors.cpp
1319
domain/errors.cpp
1420
domain/domain.cpp
1521
domain/punycode.cpp
1622
domain/idna.cpp
17-
core/url_parser_context.hpp
18-
core/url_parser_context.cpp
19-
core/url_record.cpp
23+
percent_encoding/errors.cpp
2024
network/ipv4_address.cpp
2125
network/ipv6_address.cpp
2226
string/ascii.hpp
2327
string/join.hpp
2428
string/locale.hpp
2529
string/split.hpp
2630
string/starts_with.hpp
27-
core/url_parse.cpp
28-
core/url_parse_impl.hpp
29-
core/url_serialize.cpp
30-
core/url_schemes.hpp
31-
core/url_schemes.cpp
3231
url/url.cpp
33-
core/url_error.cpp
3432
url/url_search_parameters.cpp
3533
filesystem/path.cpp
36-
percent_encoding/errors.cpp
3734

3835
${PROJECT_SOURCE_DIR}/include/skyr/config.hpp
3936
${PROJECT_SOURCE_DIR}/include/skyr/version.hpp
37+
${PROJECT_SOURCE_DIR}/include/skyr/core/url_record.hpp
38+
${PROJECT_SOURCE_DIR}/include/skyr/core/url_parse.hpp
39+
${PROJECT_SOURCE_DIR}/include/skyr/core/url_serialize.hpp
40+
${PROJECT_SOURCE_DIR}/include/skyr/core/url_schemes.hpp
41+
${PROJECT_SOURCE_DIR}/include/skyr/core/url_error.hpp
4042
${PROJECT_SOURCE_DIR}/include/skyr/traits/string_traits.hpp
4143
${PROJECT_SOURCE_DIR}/include/skyr/unicode/errors.hpp
4244
${PROJECT_SOURCE_DIR}/include/skyr/unicode/core.hpp
@@ -49,6 +51,7 @@ set(skyr_SRCS
4951
${PROJECT_SOURCE_DIR}/include/skyr/unicode/ranges/transforms/byte_transform.hpp
5052
${PROJECT_SOURCE_DIR}/include/skyr/unicode/ranges/transforms/u16_transform.hpp
5153
${PROJECT_SOURCE_DIR}/include/skyr/unicode/ranges/transforms/u32_transform.hpp
54+
${PROJECT_SOURCE_DIR}/include/skyr/unicode/details/to_bytes.hpp
5255
${PROJECT_SOURCE_DIR}/include/skyr/domain/errors.hpp
5356
${PROJECT_SOURCE_DIR}/include/skyr/domain/idna.hpp
5457
${PROJECT_SOURCE_DIR}/include/skyr/domain/punycode.hpp
@@ -61,12 +64,7 @@ set(skyr_SRCS
6164
${PROJECT_SOURCE_DIR}/include/skyr/percent_encoding/percent_encode.hpp
6265
${PROJECT_SOURCE_DIR}/include/skyr/network/ipv4_address.hpp
6366
${PROJECT_SOURCE_DIR}/include/skyr/network/ipv6_address.hpp
64-
${PROJECT_SOURCE_DIR}/include/skyr/core/url_record.hpp
65-
${PROJECT_SOURCE_DIR}/include/skyr/core/url_parse.hpp
66-
${PROJECT_SOURCE_DIR}/include/skyr/core/url_serialize.hpp
67-
${PROJECT_SOURCE_DIR}/include/skyr/core/url_error.hpp
6867
${PROJECT_SOURCE_DIR}/include/skyr/query/query_iterator.hpp
69-
${PROJECT_SOURCE_DIR}/include/skyr/unicode/details/to_bytes.hpp
7068
${PROJECT_SOURCE_DIR}/include/skyr/platform/endianness.hpp
7169
${PROJECT_SOURCE_DIR}/include/skyr/url/url_record.hpp
7270
${PROJECT_SOURCE_DIR}/include/skyr/url/url_parse.hpp

src/core/url_parser_context.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
#include <skyr/domain/domain.hpp>
1515
#include <skyr/percent_encoding/percent_decode_range.hpp>
1616
#include "url_parser_context.hpp"
17-
#include "url_schemes.hpp"
17+
#include "skyr/core/url_schemes.hpp"
1818
#include "string/starts_with.hpp"
1919
#include "string/locale.hpp"
2020

@@ -288,11 +288,11 @@ auto url_parser_context::parse_scheme(char byte) -> tl::expected<url_parse_actio
288288
buffer.push_back(lower);
289289
} else if (byte == ':') {
290290
if (state_override) {
291-
if (url.is_special() && !details::is_special(buffer)) {
291+
if (url.is_special() && !is_special(buffer)) {
292292
return tl::make_unexpected(url_parse_errc::cannot_override_scheme);
293293
}
294294

295-
if (!url.is_special() && details::is_special(buffer)) {
295+
if (!url.is_special() && is_special(buffer)) {
296296
return tl::make_unexpected(url_parse_errc::cannot_override_scheme);
297297
}
298298

@@ -308,7 +308,7 @@ auto url_parser_context::parse_scheme(char byte) -> tl::expected<url_parse_actio
308308
url.scheme = buffer;
309309

310310
if (state_override) {
311-
if (url.port == details::default_port(url.scheme)) {
311+
if (url.port == default_port(url.scheme)) {
312312
url.port = std::nullopt;
313313
}
314314
return url_parse_action::success;
@@ -611,7 +611,8 @@ auto url_parser_context::parse_port(char byte) -> tl::expected<url_parse_action,
611611
return tl::make_unexpected(port.error());
612612
}
613613

614-
if (details::is_default_port(url.scheme, port.value())) {
614+
auto dport = default_port(url.scheme);
615+
if (dport && (dport.value() == port.value())) {
615616
url.port = std::nullopt;
616617
}
617618
else {

src/core/url_record.cpp

Lines changed: 0 additions & 40 deletions
This file was deleted.

0 commit comments

Comments
 (0)