-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathentrypoint.jl
142 lines (126 loc) · 4.45 KB
/
entrypoint.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#-------------------------------------------------------------------------------
# Entry point utilities
#
# These make it easy for users to open `DataSet`s and map them into types
# understood by their program.
function extract_dtypes(call)
dtypes = []
jtypes = []
argnames = []
for ex in call.args[2:end]
@assert ex.head == :call && ex.args[1] == :(=>)
@assert ex.args[2].head == :(::) && length(ex.args[2].args) == 2
push!(argnames, ex.args[2].args[1])
push!(dtypes, ex.args[2].args[2])
push!(jtypes, ex.args[3])
end
argnames, dtypes, jtypes
end
"""
@datafunc function f(x::DT=>T, y::DS=>S...)
...
end
Define the function `f(x::T, y::S, ...)` and add data dispatch rules so that
`f(x::DataSet, y::DataSet)` will open datasets matching dataset types `DT,DS`
as Julia types `T,S`.
"""
macro datafunc(func_expr)
@assert func_expr.head == :function
call = func_expr.args[1]
body = func_expr.args[2]
funcname = call.args[1]
argnames, dtypes, jtypes = extract_dtypes(call)
real_args = [:($n::$t) for (n,t) in zip(argnames, jtypes)]
table_name = Symbol("#_$(funcname)_datasets_dispatch")
esc_funcname = esc(funcname)
esc_table_name = esc(table_name)
func_expr.args[1].args[2:end] = real_args
quote
if !$(esc(:(@isdefined($table_name))))
function $esc_funcname(ds::DataSet...)
_run($esc_funcname, $esc_table_name, ds...)
end
const $esc_table_name = Dict()
end
push!($esc_table_name, tuple($(map(string, dtypes)...)) =>
tuple($(map(esc, jtypes)...)))
$(esc(func_expr))
end
end
function datarun(proj::AbstractDataProject, func::Function, data_names::AbstractString...)
ds = map(n->dataset(proj, n), data_names)
func(ds...)
end
"""
@datarun [proj] func(args...)
Run `func` with the named `DataSet`s from the list `args`.
# Example
Load `DataSet`s named a,b as defined in Data.toml, and pass them to `f()`.
```
proj = DataSets.load_project("Data.toml")
@datarun proj f("a", "b")
```
"""
macro datarun(args...)
if length(args) == 2
proj, call = args
esc_proj = esc(proj)
elseif length(args) == 1
esc_proj = :PROJECT
call = args[1]
else
throw(ArgumentError("@datarun macro expects one or two arguments"))
end
esc_funcname = esc(call.args[1])
esc_funcargs = esc.(call.args[2:end])
quote
datarun($esc_proj, $esc_funcname, $(esc_funcargs...))
end
end
"""
dataset_type(dataset)
Get a string representation of the "DataSet type", which represents the type of
the data *outside* Julia.
A given DataSet type may be mapped into many different Julia types. For example
consider the "Blob" type which is an array of bytes (commonly held in a file).
When loaded into Julia, this may be represented as a
* IO — via open())
* String — via open() |> read(_,String)
* Vector{UInt8} — via mmap)
* Path
"""
function dataset_type(d::DataSet)
# TODO: Enhance this once maps can be applied on top of the storage layer
# Should we use MIME type? What about layering?
d.storage["type"]
end
function _openall(func, opened, (dataset,T), to_open...)
open(T, dataset) do newly_opened
_openall(func, (opened..., newly_opened), to_open...)
end
end
function _openall(func, opened)
func(opened...)
end
# Match `dataset_type` of `ds` against `dispatch_table`, using the match to
# determine the appropriate Julia types we will open.
function _run(func, dispatch_table, ds::DataSet...)
# For now, uses a simplistic exact matching strategy. We don't use Julia's
# builtin dispatch here because
# a) It seems wasteful to create a pile of tag types just for the purposes
# of matching some strings
# b) It seems like a good idea to separate the declarative "data
# typesystem" (implicitly defined outside Julia) from Julia's type
# system and dispatch rules.
dtypes = dataset_type.(ds)
if !haskey(dispatch_table, dtypes)
table = join(string.(keys(dispatch_table)), "\n")
throw(ArgumentError("""No matching function $func for DataSet types $dtypes.
The types must match one of the following:
$table
"""))
end
julia_types = dispatch_table[dtypes]
to_open = Pair.(ds, julia_types)
_openall(func, (), to_open...)
end