Merge pull request #58 from JuliaComputing/backports-0.2.8

mortenpi · web-flow · commit 6c06e3a93fa7 · 2022-11-29T22:43:52.000+13:00
Backports for 0.2.8
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "DataSets"
 uuid = "c9661210-8a83-48f0-b833-72e62abce419"
 authors = ["Chris Foster <chris42f@gmail.com> and contributors"]
-version = "0.2.7"
+version = "0.2.8"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
diff --git a/src/DataSets.jl b/src/DataSets.jl
@@ -94,21 +94,20 @@ separated with forward slashes. Examples:
     organization-dataset_name/project/data
 """
 function check_dataset_name(name::AbstractString)
-    # DataSet names disallow most punctuation for now, as it may be needed as
-    # delimiters in data-related syntax (eg, for the data REPL).
-    dataset_name_pattern = r"
-        ^
-        [[:alpha:]]
-        (?:
-            [-[:alnum:]_]     |
-            / (?=[[:alpha:]])
-        )*
-        $
-        "x
-    if !occursin(dataset_name_pattern, name)
+    if !occursin(DATASET_NAME_REGEX, name)
         error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.")
     end
 end
+# DataSet names disallow most punctuation for now, as it may be needed as
+# delimiters in data-related syntax (eg, for the data REPL).
+const DATASET_NAME_REGEX_STRING = raw"""
+[[:alpha:]]
+(?:
+    [-[:alnum:]_]     |
+    / (?=[[:alpha:]])
+)*
+"""
+const DATASET_NAME_REGEX = Regex("^\n$(DATASET_NAME_REGEX_STRING)\n\$", "x")
 
 # Hacky thing until we figure out which fields DataSet should actually have.
 function Base.getproperty(d::DataSet, name::Symbol)
@@ -254,16 +253,22 @@ function _unescapeuri(str)
     return String(take!(out))
 end
 
+# Parse as a suffix of URI syntax
+# name/of/dataset?param1=value1&param2=value2#fragment
+const DATASET_SPEC_REGEX = Regex(
+    """
+    ^
+    ($(DATASET_NAME_REGEX_STRING))
+    (?:\\?([^#]*))? # query    - a=b&c=d
+    (?:\\#(.*))?    # fragment - ...
+    \$
+    """,
+    "x",
+)
 function _split_dataspec(spec::AbstractString)
     # Parse as a suffix of URI syntax
     # name/of/dataset?param1=value1&param2=value2#fragment
-    m = match(r"
-        ^
-        ((?:[[:alpha:]][[:alnum:]_]*/?)+)  # name     - a/b/c
-        (?:\?([^#]*))?                     # query    - a=b&c=d
-        (?:\#(.*))?                        # fragment - ...
-        $"x,
-        spec)
+    m = match(DATASET_SPEC_REGEX, spec)
     if isnothing(m)
         return nothing, nothing, nothing
     end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -92,23 +92,37 @@ end
 
 #-------------------------------------------------------------------------------
 @testset "Data set name parsing" begin
-    # Valid names
-    @test DataSets.check_dataset_name("a_b") === nothing
-    @test DataSets.check_dataset_name("a1") === nothing
-    @test DataSets.check_dataset_name("δεδομένα") === nothing
-    @test DataSets.check_dataset_name("a/b") === nothing
-    @test DataSets.check_dataset_name("a/b/c") === nothing
-    @test DataSets.check_dataset_name("a-b-c-") === nothing
-    # Invalid names
-    @test_throws ErrorException("DataSet name \"a?b\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.") DataSets.check_dataset_name("a?b")
-    @test_throws ErrorException DataSets.check_dataset_name("1")
-    @test_throws ErrorException DataSets.check_dataset_name("a b")
-    @test_throws ErrorException DataSets.check_dataset_name("a.b")
-    @test_throws ErrorException DataSets.check_dataset_name("a/b/")
-    @test_throws ErrorException DataSets.check_dataset_name("/a/b")
+    @testset "Valid name: $name" for name in (
+        "a_b", "a-b", "a1", "δεδομένα", "a/b", "a/b/c", "a-", "b_",
+    )
+        @test DataSets.check_dataset_name(name) === nothing
+        @test DataSets._split_dataspec(name) == (name, nothing, nothing)
+    end
+
+    @testset "Invalid name: $name" for name in (
+        "1", "a b", "a.b", "a/b/", "a//b", "/a/b", "a/-", "a/1", "a/ _/b"
+    )
+        @test_throws ErrorException DataSets.check_dataset_name(name)
+        @test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
+    end
 end
 
 @testset "URL-like dataspec parsing" begin
+    # Valid dataspecs
+    DataSets._split_dataspec("foo?x=1#f") == ("foo", ["x" => "1"], "f")
+    DataSets._split_dataspec("foo#f") == ("foo", nothing, "f")
+    DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
+    DataSets._split_dataspec("foo?x=1") == ("foo", ["x" => "1"], nothing)
+    # Invalid dataspecs
+    DataSets._split_dataspec("foo ?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("foo\n?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("foo\nbar?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec(" foo?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("1?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("foo-?x=1") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("foo #f") == (nothing, nothing, nothing)
+    DataSets._split_dataspec("@?x=1") == (nothing, nothing, nothing)
+
     proj = DataSets.load_project("Data.toml")
 
     @test !haskey(dataset(proj, "a_text_file"), "dataspec")