-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfilesystem.jl
293 lines (248 loc) · 9.72 KB
/
filesystem.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
#
# Storage Driver implementation for trees which are rooted in the file system
# (in git terminology, there exists a "working copy")
#
abstract type AbstractFileSystemRoot end
# These functions sys_abspath and sys_joinpath generate/joins OS-specific
# _local filesystem paths_ out of logical paths. They should be defined only
# for trees which are rooted in the actual filesystem.
function sys_abspath(root::AbstractFileSystemRoot, path::RelPath)
rootpath = sys_abspath(root)
return isempty(path.components) ? rootpath : joinpath(rootpath, sys_joinpath(path))
end
sys_joinpath(path::RelPath) = isempty(path.components) ? "" : joinpath(path.components...)
sys_abspath(path::AbsPath) = sys_abspath(path.root, path.path)
sys_abspath(tree::BlobTree) = sys_abspath(tree.root, tree.path)
sys_abspath(file::Blob) = sys_abspath(file.root, file.path)
#--------------------------------------------------
# Storage data interface for trees
#
# TODO: Formalize this interface!
## 1. Query
# TODO: would it be better to express the following dispatch in terms of
# AbsPath{<:AbstractFileSystemRoot} rather than usin double dispatch?
Base.isdir(root::AbstractFileSystemRoot, path::RelPath) = isdir(sys_abspath(root, path))
Base.isfile(root::AbstractFileSystemRoot, path::RelPath) = isfile(sys_abspath(root, path))
Base.ispath(root::AbstractFileSystemRoot, path::RelPath) = ispath(sys_abspath(root, path))
Base.summary(io::IO, root::AbstractFileSystemRoot) = print(io, sys_abspath(root))
Base.readdir(root::AbstractFileSystemRoot, path::RelPath) = readdir(sys_abspath(root, path))
## 2. Mutation
#
# TODO: Likely requires rework!
function Base.mkdir(root::AbstractFileSystemRoot, path::RelPath; kws...)
if !iswriteable(root)
error("Cannot make directory in read-only tree root at $(sys_abspath(p.root))")
end
mkdir(sys_abspath(root, path), args...)
return BlobTree(root, path)
end
function Base.rm(root::AbstractFileSystemRoot, path::RelPath; kws...)
rm(sys_abspath(root,path); kws...)
end
#--------------------------------------------------
# Storage data interface for Blob
# TODO: Make this the generic implementation for AbstractDataStorage
function Base.open(f::Function, as_type::Type{IO},
root::AbstractFileSystemRoot, path; kws...)
@context f(@! open(as_type, root, path; kws...))
end
@! function Base.open(::Type{IO}, root::AbstractFileSystemRoot, path;
write=false, read=!write, kws...)
if !iswriteable(root) && write
error("Error writing file at read-only path $path")
end
@! open(sys_abspath(root, path); read=read, write=write, kws...)
end
Base.read(root::AbstractFileSystemRoot, path::RelPath, ::Type{T}) where {T} =
read(sys_abspath(root, path), T)
Base.read(root::AbstractFileSystemRoot, path::RelPath) =
read(sys_abspath(root, path))
#--------------------------------------------------
"""
## Metadata spec
For Blob:
```
[datasets.storage]
driver="FileSystem"
type="Blob"
path=\$(path_to_file)
```
For BlobTree:
```
[datasets.storage]
driver="FileSystem"
type="BlobTree"
path=\$(path_to_directory)
```
"""
struct FileSystemRoot <: AbstractFileSystemRoot
path::String
read::Bool
write::Bool
end
function FileSystemRoot(path::AbstractString; write=false, read=true)
path = abspath(path)
FileSystemRoot(path, read, write)
end
iswriteable(root::FileSystemRoot) = root.write
sys_abspath(root::FileSystemRoot) = root.path
function Base.abspath(relpath::RelPath)
Base.depwarn("""
`abspath(::RelPath)` defaults to using `pwd()` as the root of the path
but this leads to fragile code so will be removed in the future""",
:abspath)
AbsPath(FileSystemRoot(pwd(); write=true, read=true), relpath)
end
#-------------------------------------------------------------------------------
# Infrastructure for a somewhat more functional interface for creating file
# trees than the fully mutable version we usually use.
mutable struct TempFilesystemRoot <: AbstractFileSystemRoot
path::Union{Nothing,String}
function TempFilesystemRoot(path)
root = new(path)
finalizer(root) do r
if !isnothing(r.path)
rm(r.path, recursive=true, force=true)
end
end
return root
end
end
function Base.readdir(root::TempFilesystemRoot, path::RelPath)
return isnothing(root.path) ? [] : readdir(sys_abspath(root, path))
end
iswriteable(root::TempFilesystemRoot) = true
sys_abspath(root::TempFilesystemRoot) = root.path
"""
newdir()
Create a new temporary `BlobTree` which can have files assigned into it and may
be assigned to a permanent location in a persistent `BlobTree`. If not assigned
to a permanent location, the temporary tree is cleaned up during garbage
collection.
"""
function newdir(ctx::AbstractFileSystemRoot=FileSystemRoot(tempdir(), write=true))
# cleanup=false: we manage our own cleanup via the finalizer
path = mktempdir(sys_abspath(ctx), cleanup=false)
return BlobTree(TempFilesystemRoot(path))
end
newdir(ctx::BlobTree) = newdir(ctx.root)
function newfile(ctx::AbstractFileSystemRoot=FileSystemRoot(tempdir(), write=true))
path, io = mktemp(sys_abspath(ctx), cleanup=false)
close(io)
return Blob(TempFilesystemRoot(path))
end
newfile(ctx::BlobTree) = newfile(ctx.root)
"""
newfile(func)
newfile(func, ctx)
Create a new temporary `Blob` object which may be later assigned to a permanent
location in a `BlobTree`. If not assigned to a permanent location, the
temporary file is cleaned up during garbage collection.
# Example
```
tree[path"some/demo/path.txt"] = newfile() do io
println(io, "Hi there!")
end
```
"""
function newfile(f::Function, ctx=FileSystemRoot(tempdir(), write=true))
path, io = mktemp(sys_abspath(ctx), cleanup=false)
try
f(io)
catch
rm(path)
rethrow()
finally
close(io)
end
return Blob(TempFilesystemRoot(path))
end
# Move srcpath to destpath, making all attempts to preserve the original
# content of `destpath` if anything goes wrong. We assume that `srcpath` is
# temporary content which doesn't need to be protected.
function mv_force_with_dest_rollback(srcpath, destpath, tempdir_parent)
holding_area = nothing
held_path = nothing
if ispath(destpath)
# If the destination path exists, improve the atomic nature of the
# update by first moving existing data to a temporary directory.
holding_area = mktempdir(tempdir_parent, prefix="jl_to_remove_", cleanup=false)
name = basename(destpath)
held_path = joinpath(holding_area,name)
mv(destpath, held_path)
end
try
mv(srcpath, destpath)
catch
try
if !isnothing(holding_area)
# Attempt to put things back as they were!
mv(held_path, destpath)
end
catch
# At this point we've tried our best to preserve the user's data
# but something has gone wrong, likely at the OS level. The user
# will have to clean up manually if possible.
error("""
Something when wrong while moving data to path $destpath.
We tried restoring the original data to $destpath, but were
met with another error. The original data is preserved in
$held_path
See the catch stack for the root cause.
""")
end
rethrow()
end
if !isnothing(holding_area)
# If we get to here, it's safe to remove the holding area
rm(holding_area, recursive=true)
end
end
function Base.setindex!(tree::BlobTree{<:AbstractFileSystemRoot},
tmpdata::Union{Blob{TempFilesystemRoot},BlobTree{TempFilesystemRoot}},
name::AbstractString)
if !iswriteable(tree.root)
error("Attempt to move to a read-only tree $tree")
end
if isnothing(tmpdata.root.path)
type = isdir(tmpdata) ? "directory" : "file"
error("Attempted to root a temporary $type which has already been moved to $(tree.path)/$name ")
end
if !isempty(tree.path)
# Eh, the number of ways the user can misuse this isn't really funny :-/
error("Temporary trees must be moved in full. The tree had non-empty path $(tree.path)")
end
destpath = sys_abspath(joinpath(tree, name))
srcpath = sys_abspath(tmpdata)
tempdir_parent = sys_abspath(tree)
mv_force_with_dest_rollback(srcpath, destpath, tempdir_parent)
# Transfer ownership of the data to `tree`. This is ugly to be sure, as it
# leaves `tmpdata` empty! However, we'll have to live with this wart unless
# we want to be duplicating large amounts of data on disk.
tmpdata.root.path = nothing
return tree
end
# It's interesting to read about the linux VFS interface in regards to how the
# OS actually represents these things. For example
# https://stackoverflow.com/questions/36144807/why-does-linux-use-getdents-on-directories-instead-of-read
#--------------------------------------------------
# Filesystem storage driver
function connect_filesystem(f, config, dataset)
path = config["path"]
type = config["type"]
if type == "Blob"
isfile(path) || throw(ArgumentError("$(repr(path)) should be a file"))
storage = Blob(FileSystemRoot(path))
elseif type == "BlobTree"
isdir(path) || throw(ArgumentError("$(repr(path)) should be a directory"))
storage = BlobTree(FileSystemRoot(path))
path = dataspec_fragment_as_path(dataset)
if !isnothing(path)
storage = storage[path]
end
else
throw(ArgumentError("DataSet type $type not supported on the filesystem"))
end
f(storage)
end
add_storage_driver("FileSystem"=>connect_filesystem)