Skip to content

Commit 4846eb6

Browse files
authoredFeb 4, 2025··
Merge pull request #73 from JuliaComputing/mp/dataset-names-period
feat: allow periods in dataset names
2 parents f8074f3 + 004b35b commit 4846eb6

File tree

5 files changed

+123
-18
lines changed

5 files changed

+123
-18
lines changed
 

‎Project.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "DataSets"
22
uuid = "c9661210-8a83-48f0-b833-72e62abce419"
33
authors = ["Chris Foster <chris42f@gmail.com> and contributors"]
4-
version = "0.2.11"
4+
version = "0.2.12"
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"

‎src/DataSets.jl

+27-5
Original file line numberDiff line numberDiff line change
@@ -87,27 +87,49 @@ end
8787
Check whether a dataset name is valid.
8888
8989
Valid names must start with a letter or a number, the rest of the name can also contain `-`
90-
and `_` characters. The names can also be hieracicial, with segments separated by forward
91-
slashes (`/`). Each segment must also start with either a letter or a number. For example:
90+
and `_` characters. The names can also be hierarchical, with segments separated by forward
91+
slashes (`/`) or (`.`). Each segment must also start with either a letter or a number.
92+
93+
For example, the following dataset names are valid:
9294
9395
my_data
9496
my_data_1
9597
username/data
9698
organization_name/project-name/data
9799
123user/456dataset--name
100+
username/my_table.csv
101+
dataset/v0.1.2
102+
103+
whereas names like this are invalid:
104+
105+
__mydata__
106+
username/.git
107+
my...dataset
108+
109+
!!! note "Segment separators"
110+
111+
In dataset names, both `/` and `.` are considered segment separators from a syntax
112+
perspective. While DataSets.jl does not impose any specific interpretation on the
113+
dataset name, it is recommended to use `/` to separate segments from a semantic
114+
perspective, and to interpret each forward-slash-separated segment as a path separator.
115+
Periods would conventionally be used to separate file extensions within a segment.
116+
117+
E.g. use `username/my-project-data/population.csv`, rather than
118+
`username.my-project-data.population.csv` or something like that.
98119
"""
99120
function check_dataset_name(name::AbstractString)
100121
if !occursin(DATASET_NAME_REGEX, name)
101-
error("DataSet name \"$name\" is invalid. DataSet names must start with a letter and can contain only letters, numbers, `-`, `_` or `/`.")
122+
error("DataSet name \"$name\" is invalid. DataSet names must start with a letter or a number, and can contain only letters, numbers, `-` and `_`, or `/` and `.` as segment separators.")
102123
end
103124
end
104125
# DataSet names disallow most punctuation for now, as it may be needed as
105126
# delimiters in data-related syntax (eg, for the data REPL).
106127
const DATASET_NAME_REGEX_STRING = raw"""
107128
[[:alnum:]]
108129
(?:
109-
[-[:alnum:]_] |
110-
/ (?=[[:alnum:]])
130+
[-[:alnum:]_] |
131+
\.(?=[[:alnum:]]) |
132+
\/ (?=[[:alnum:]])
111133
)*
112134
"""
113135
const DATASET_NAME_REGEX = Regex("^\n$(DATASET_NAME_REGEX_STRING)\n\$", "x")

‎test/runtests.jl

+46-12
Original file line numberDiff line numberDiff line change
@@ -98,21 +98,55 @@ end
9898
end
9999

100100
#-------------------------------------------------------------------------------
101+
function load_list(filename)
102+
lines = eachline(joinpath(@__DIR__, filename))
103+
filter(!isempty, strip.(lines))
104+
end
101105
@testset "Data set name parsing" begin
102-
@testset "Valid name: $name" for name in (
103-
"a_b", "a-b", "a1", "δεδομένα", "a/b", "a/b/c", "a-", "b_",
104-
"1", "a/1", "123", "12ab/34cd", "1/2/3", "1-2-3", "x_-__", "a---",
105-
)
106-
@test DataSets.check_dataset_name(name) === nothing
107-
@test DataSets._split_dataspec(name) == (name, nothing, nothing)
106+
@testset "Valid names" begin
107+
valid_names = load_list("testnames-valid.txt")
108+
@test !isempty(valid_names)
109+
@testset "Valid name: $name" for name in valid_names
110+
@test DataSets.check_dataset_name(name) === nothing
111+
@test DataSets._split_dataspec(name) == (name, nothing, nothing)
112+
# Also test that the name is still valid when it appears as part of
113+
# a path elements.
114+
let path_name = "foo/$(name)"
115+
@test DataSets.check_dataset_name(path_name) === nothing
116+
@test DataSets._split_dataspec(path_name) == (path_name, nothing, nothing)
117+
end
118+
let path_name = "$(name)/foo"
119+
@test DataSets.check_dataset_name(path_name) === nothing
120+
@test DataSets._split_dataspec(path_name) == (path_name, nothing, nothing)
121+
end
122+
let path_name = "foo/$(name)/bar"
123+
@test DataSets.check_dataset_name(path_name) === nothing
124+
@test DataSets._split_dataspec(path_name) == (path_name, nothing, nothing)
125+
end
126+
end
108127
end
109128

110-
@testset "Invalid name: $name" for name in (
111-
"a b", "a.b", "a/b/", "a//b", "/a/b", "a/-", "a/ _/b",
112-
"a/-a", "a/-1",
113-
)
114-
@test_throws ErrorException DataSets.check_dataset_name(name)
115-
@test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
129+
@testset "Invalid names" begin
130+
invalid_names = load_list("testnames-invalid.txt")
131+
@test !isempty(invalid_names)
132+
@testset "Invalid name: $name" for name in invalid_names
133+
@test_throws ErrorException DataSets.check_dataset_name(name)
134+
@test DataSets._split_dataspec(name) == (nothing, nothing, nothing)
135+
# Also test that the name is still invalid when it appears as part of
136+
# a path elements.
137+
let path_name = "foo/$(name)"
138+
@test_throws ErrorException DataSets.check_dataset_name(path_name) === nothing
139+
@test DataSets._split_dataspec(path_name) == (nothing, nothing, nothing)
140+
end
141+
let path_name = "$(name)/foo"
142+
@test_throws ErrorException DataSets.check_dataset_name(path_name) === nothing
143+
@test DataSets._split_dataspec(path_name) == (nothing, nothing, nothing)
144+
end
145+
let path_name = "foo/$(name)/bar"
146+
@test_throws ErrorException DataSets.check_dataset_name(path_name) === nothing
147+
@test DataSets._split_dataspec(path_name) == (nothing, nothing, nothing)
148+
end
149+
end
116150
end
117151
end
118152

‎test/testnames-invalid.txt

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
a b
2+
a/b/
3+
a//b
4+
/a/b
5+
a/-
6+
a/ _/b
7+
a/-a
8+
a/-1
9+
.a
10+
..a
11+
a.
12+
a..
13+
.a.
14+
a..b
15+
.abc
16+
abc.
17+
abc/.def
18+
abc/def.
19+
a./b
20+
a.-
21+
_._
22+
a._b
23+
a.-b
24+
./a
25+
b/../a

‎test/testnames-valid.txt

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
a_b
2+
a-b
3+
a1
4+
δεδομένα
5+
a/b
6+
a/b/c
7+
a-
8+
b_
9+
1
10+
a/1
11+
123
12+
12ab/34cd
13+
1/2/3
14+
1-2-3
15+
x_-__
16+
a---
17+
a.b
18+
a.b
19+
abc.def
20+
abc/def.ghi
21+
abc-def.ghi_jkl
22+
a.b.c
23+
a_.c
24+
foo__-.csv

0 commit comments

Comments
 (0)
Please sign in to comment.