@@ -3,6 +3,7 @@ module DataSets
3
3
using UUIDs
4
4
using TOML
5
5
using SHA
6
+ using Contexts
6
7
7
8
export DataSet, dataset, @datafunc , @datarun
8
9
export Blob, BlobTree, newfile, newdir
@@ -516,41 +517,150 @@ function Base.open(f::Function, as_type, dataset::DataSet)
516
517
end
517
518
end
518
519
519
- # For convenience, this non-scoped open() just returns the data handle as
520
- # opened. See check_scoped_open for a way to help users avoid errors when using
521
- # this (ie, if `identity` is not a valid argument to open() because resources
522
- # would be closed before it returns).
520
+ # Option 1
523
521
#
524
- # FIXME : Consider removing this. It should likely be replaced with `load()`, in
525
- # analogy to FileIO.jl's load operation:
526
- # * `load()` is "load the entire file into memory as such-and-such type"
527
- # * `open()` is "open this resource, and run some function while it's open"
528
- Base. open (as_type, conf:: DataSet ) = open (identity, as_type, conf)
522
+ # Attempt to use finalizers and the "async trick" to recover the resource from
523
+ # inside a do block.
524
+ #
525
+ # Problems:
526
+ # * The async stack may contain a reference to the resource, so it may never
527
+ # get finalized.
528
+ # * The returned resource must be mutable to attach a finalizer to it.
529
529
530
- """
531
- check_scoped_open(func, as_type)
530
+ #=
531
+ function Base.open(as_type, dataset::DataSet)
532
+ storage_config = dataset.storage
533
+ driver = _drivers[storage_config["driver"]]
534
+ ch = Channel()
535
+ @async try
536
+ driver(storage_config, dataset) do storage
537
+ open(as_type, storage) do x
538
+ put!(ch, x)
539
+ if needs_finalizer(x)
540
+ ch2 = Channel(1)
541
+ finalizer(x) do _
542
+ put!(ch2, true)
543
+ end
544
+ # This is pretty delicate — it won't work if the compiler
545
+ # retains a reference to `x` due to its existence in local
546
+ # scope.
547
+ #
548
+ # Likely, it depends on
549
+ # - inlining the current closure into open()
550
+ # - Assuming that the implementation of open() itself
551
+ # doesn't keep a reference to x.
552
+ # - Assuming that the compiler (or interpreter) doesn't
553
+ # keep any stray references elsewhere.
554
+ #
555
+ # In all, it seems like this can't be reliable in general.
556
+ x = nothing
557
+ take!(ch2)
558
+ end
559
+ end
560
+ end
561
+ @info "Done"
562
+ catch exc
563
+ put!(ch, exc)
564
+ end
565
+ y = take!(ch)
566
+ if y isa Exception
567
+ throw(y)
568
+ end
569
+ y
570
+ end
532
571
533
- Call `check_scoped_open(func, as_type) in your implementation of `open(func,
534
- as_type, data)` if you clean up or `close()` resources by the time `open()`
535
- returns.
572
+ needs_finalizer(x) = false
573
+ needs_finalizer(x::IO) = true
536
574
537
- That is, if the unscoped form `use(open(AsType, data))` is invalid and the
538
- following scoped form required:
575
+ =#
539
576
540
- ```
541
- open(AsType, data) do x
542
- use(x)
577
+ # Option 2
578
+ #
579
+ # Attempt to return both the object-of-interest as well as a resource handle
580
+ # from open()
581
+ #
582
+ # Problem:
583
+ # * The user doesn't care about the handle! Now they've got to unpack it...
584
+
585
+ #=
586
+ struct NullResource
543
587
end
544
- ```
545
588
546
- The dicotomy of resource handling techniques in `open()` are due to an
547
- unresolved language design problem of how resource handling and cleanup should
548
- work (see https://github.com/JuliaLang/julia/issues/7721).
549
- """
550
- check_scoped_open (func, as_type) = nothing
589
+ Base.close(rc::NullResource) = nothing
590
+ Base.wait(rc::NullResource) = nothing
591
+
592
+ struct ResourceControl
593
+ cond::Threads.Condition
594
+ end
595
+
596
+ ResourceControl() = ResourceControl(Threads.Condition())
597
+
598
+ Base.close(rc::ResourceControl) = lock(()->notify(rc.cond), rc.cond)
599
+ Base.wait(rc::ResourceControl) = lock(()->wait(rc.cond), rc.cond)
600
+
601
+ function _open(as_type, dataset::DataSet)
602
+ storage_config = dataset.storage
603
+ driver = _drivers[storage_config["driver"]]
604
+ ch = Channel()
605
+ @async try
606
+ driver(storage_config, dataset) do storage
607
+ open(as_type, storage) do x
608
+ rc = needs_finalizer(x) ? ResourceControl() : NullResource()
609
+ put!(ch, (x,rc))
610
+ wait(rc)
611
+ end
612
+ end
613
+ @info "Done"
614
+ catch exc
615
+ put!(ch, exc)
616
+ end
617
+ y = take!(ch)
618
+ if y isa Exception
619
+ throw(y)
620
+ end
621
+ y
622
+ end
623
+
624
+
625
+ y = open(foo)!
626
+
627
+ # ... means
628
+
629
+ y,z = open_(foo)
630
+ finalizer(_->close(z), y)
631
+ =#
632
+
633
+ # Option 3:
634
+ #
635
+ # Context-passing as in Contexts.jl
636
+ #
637
+ # Looks pretty good!
638
+
639
+ @! function Base. open (dataset:: DataSet )
640
+ storage_config = dataset. storage
641
+ driver = _drivers[storage_config[" driver" ]]
642
+ # Use `enter_do` because drivers don't yet use the Contexts.jl mechanism
643
+ (storage,) = @! enter_do (driver, storage_config, dataset)
644
+ storage
645
+ end
646
+
647
+ @! function Base. open (as_type, dataset:: DataSet )
648
+ storage = @! open (dataset)
649
+ @! open (as_type, storage)
650
+ end
651
+
652
+ # Option 4:
653
+ #
654
+ # Deprecate open() and only supply load()
655
+ #
656
+ # Problems:
657
+ # * Not exactly clear where one ends and the other begins.
658
+
659
+ # All this, in order to deprecate the following function:
551
660
552
- function check_scoped_open (func:: typeof (identity), as_type)
553
- throw (ArgumentError (" You must use the scoped form `open(your_function, AsType, data)` to open as type $as_type " ))
661
+ function Base. open (as_type, conf:: DataSet )
662
+ Base. depwarn (" `open(as_type, dataset::DataSet)` is deprecated. Use @! open(as_type, dataset) instead" , :open )
663
+ @! open (as_type, conf)
554
664
end
555
665
556
666
# Application entry points
0 commit comments