Skip to content

Commit 6c06e3a

Browse files
authored
Merge pull request #58 from JuliaComputing/backports-0.2.8
Backports for 0.2.8
2 parents 09efe3d + e8f153d commit 6c06e3a

File tree

3 files changed

+53
-34
lines changed

3 files changed

+53
-34
lines changed

Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "DataSets"
22
uuid = "c9661210-8a83-48f0-b833-72e62abce419"
33
authors = ["Chris Foster <chris42f@gmail.com> and contributors"]
4-
version = "0.2.7"
4+
version = "0.2.8"
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"

src/DataSets.jl

+24-19
Original file line numberDiff line numberDiff line change
@@ -94,21 +94,20 @@ separated with forward slashes. Examples:
9494
organization-dataset_name/project/data
9595
"""
9696
function check_dataset_name(name::AbstractString)
97-
# DataSet names disallow most punctuation for now, as it may be needed as
98-
# delimiters in data-related syntax (eg, for the data REPL).
99-
dataset_name_pattern = r"
100-
^
101-
[[:alpha:]]
102-
(?:
103-
[-[:alnum:]_] |
104-
/ (?=[[:alpha:]])
105-
)*
106-
$
107-
"x
108-
if !occursin(dataset_name_pattern, name)
97+
if !occursin(DATASET_NAME_REGEX, name)
10998
error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.")
11099
end
111100
end
101+
# DataSet names disallow most punctuation for now, as it may be needed as
102+
# delimiters in data-related syntax (eg, for the data REPL).
103+
const DATASET_NAME_REGEX_STRING = raw"""
104+
[[:alpha:]]
105+
(?:
106+
[-[:alnum:]_] |
107+
/ (?=[[:alpha:]])
108+
)*
109+
"""
110+
const DATASET_NAME_REGEX = Regex("^\n$(DATASET_NAME_REGEX_STRING)\n\$", "x")
112111

113112
# Hacky thing until we figure out which fields DataSet should actually have.
114113
function Base.getproperty(d::DataSet, name::Symbol)
@@ -254,16 +253,22 @@ function _unescapeuri(str)
254253
return String(take!(out))
255254
end
256255

256+
# Parse as a suffix of URI syntax
257+
# name/of/dataset?param1=value1&param2=value2#fragment
258+
const DATASET_SPEC_REGEX = Regex(
259+
"""
260+
^
261+
($(DATASET_NAME_REGEX_STRING))
262+
(?:\\?([^#]*))? # query - a=b&c=d
263+
(?:\\#(.*))? # fragment - ...
264+
\$
265+
""",
266+
"x",
267+
)
257268
function _split_dataspec(spec::AbstractString)
258269
# Parse as a suffix of URI syntax
259270
# name/of/dataset?param1=value1&param2=value2#fragment
260-
m = match(r"
261-
^
262-
((?:[[:alpha:]][[:alnum:]_]*/?)+) # name - a/b/c
263-
(?:\?([^#]*))? # query - a=b&c=d
264-
(?:\#(.*))? # fragment - ...
265-
$"x,
266-
spec)
271+
m = match(DATASET_SPEC_REGEX, spec)
267272
if isnothing(m)
268273
return nothing, nothing, nothing
269274
end

test/runtests.jl

+28-14
Original file line numberDiff line numberDiff line change
@@ -92,23 +92,37 @@ end
9292

9393
#-------------------------------------------------------------------------------
9494
@testset "Data set name parsing" begin
95-
# Valid names
96-
@test DataSets.check_dataset_name("a_b") === nothing
97-
@test DataSets.check_dataset_name("a1") === nothing
98-
@test DataSets.check_dataset_name("δεδομένα") === nothing
99-
@test DataSets.check_dataset_name("a/b") === nothing
100-
@test DataSets.check_dataset_name("a/b/c") === nothing
101-
@test DataSets.check_dataset_name("a-b-c-") === nothing
102-
# Invalid names
103-
@test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.") DataSets.check_dataset_name("a?b")
104-
@test_throws ErrorException DataSets.check_dataset_name("1")
105-
@test_throws ErrorException DataSets.check_dataset_name("a b")
106-
@test_throws ErrorException DataSets.check_dataset_name("a.b")
107-
@test_throws ErrorException DataSets.check_dataset_name("a/b/")
108-
@test_throws ErrorException DataSets.check_dataset_name("/a/b")
95+
@testset "Valid name: $name" for name in (
96+
"a_b", "a-b", "a1", "δεδομένα", "a/b", "a/b/c", "a-", "b_",
97+
)
98+
@test DataSets.check_dataset_name(name) === nothing
99+
@test DataSets._split_dataspec(name) == (name, nothing, nothing)
100+
end
101+
102+
@testset "Invalid name: $name" for name in (
103+
"1", "a b", "a.b", "a/b/", "a//b", "/a/b", "a/-", "a/1", "a/ _/b"
104+
)
105+
@test_throws ErrorException DataSets.check_dataset_name(name)
106+
@test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
107+
end
109108
end
110109

111110
@testset "URL-like dataspec parsing" begin
111+
# Valid dataspecs
112+
DataSets._split_dataspec("foo?x=1#f") == ("foo", ["x" => "1"], "f")
113+
DataSets._split_dataspec("foo#f") == ("foo", nothing, "f")
114+
DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
115+
DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
116+
# Invalid dataspecs
117+
DataSets._split_dataspec("foo ?x=1") == (nothing, nothing, nothing)
118+
DataSets._split_dataspec("foo\n?x=1") == (nothing, nothing, nothing)
119+
DataSets._split_dataspec("foo\nbar?x=1") == (nothing, nothing, nothing)
120+
DataSets._split_dataspec(" foo?x=1") == (nothing, nothing, nothing)
121+
DataSets._split_dataspec("1?x=1") == (nothing, nothing, nothing)
122+
DataSets._split_dataspec("foo-?x=1") == (nothing, nothing, nothing)
123+
DataSets._split_dataspec("foo #f") == (nothing, nothing, nothing)
124+
DataSets._split_dataspec("@?x=1") == (nothing, nothing, nothing)
125+
112126
proj = DataSets.load_project("Data.toml")
113127

114128
@test !haskey(dataset(proj, "a_text_file"), "dataspec")

0 commit comments

Comments
 (0)