From 66feea6738c0afead0b527ae1428792b5652994f Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Thu, 7 May 2020 20:20:31 -0600 Subject: [PATCH 01/15] Drop support for ChebHash. Remove ChebHash from the package in favor of better support for MonteCarloHash. Fixes #26. --- Manifest.toml | 36 ---- Project.toml | 2 - docs/src/full_api.md | 1 - docs/src/function_hashing.md | 42 +---- src/LSHFunctions.jl | 3 +- src/function_hashing/chebhash.jl | 224 ------------------------- src/function_hashing/monte_carlo.jl | 2 - test/function_hashing/test_chebhash.jl | 157 ----------------- test/runtests.jl | 1 - 9 files changed, 6 insertions(+), 462 deletions(-) delete mode 100644 src/function_hashing/chebhash.jl delete mode 100644 test/function_hashing/test_chebhash.jl diff --git a/Manifest.toml b/Manifest.toml index 5bf4f88..2365ae3 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,11 +1,5 @@ # This file is machine-generated - editing it directly is not advised -[[AbstractFFTs]] -deps = ["LinearAlgebra"] -git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716" -uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" -version = "0.5.0" - [[Arpack]] deps = ["Arpack_jll", "Libdl", "LinearAlgebra"] git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9" @@ -64,30 +58,12 @@ git-tree-sha1 = "f3464968c65fc78846dad1c038c474a2c39bbb23" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" version = "0.25.0" -[[FFTW]] -deps = ["AbstractFFTs", "FFTW_jll", "IntelOpenMP_jll", "Libdl", "LinearAlgebra", "MKL_jll", "Reexport"] -git-tree-sha1 = "14536c95939aadcee44014728a459d2fe3ca9acf" -uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" -version = "1.2.2" - -[[FFTW_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "6c975cd606128d45d1df432fb812d6eb10fee00b" -uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" -version = "3.3.9+5" - [[FillArrays]] deps = ["LinearAlgebra", "Random", "SparseArrays"] git-tree-sha1 = "bf726ba7ce99e00d10bf63c031285fb9ab3676ae" uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" version = "0.8.11" -[[IntelOpenMP_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "fb8e1c7a5594ba56f9011310790e03b5384998d6" -uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" -version = "2018.0.3+0" - [[InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" @@ -112,12 +88,6 @@ uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" -[[MKL_jll]] -deps = ["IntelOpenMP_jll", "Libdl", "Pkg"] -git-tree-sha1 = "0ce9a7fa68c70cf83c49d05d2c04d91b47404b08" -uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" -version = "2020.1.216+0" - [[Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" @@ -182,12 +152,6 @@ uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" deps = ["Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -[[Reexport]] -deps = ["Pkg"] -git-tree-sha1 = "7b1d07f411bc8ddb7977ec7f377b97b158514fe0" -uuid = "189a3867-3050-52da-a836-e630ba90ab69" -version = "0.2.0" - [[Rmath]] deps = ["Random", "Rmath_jll"] git-tree-sha1 = "86c5647b565873641538d8f812c04e4c9dbeb370" diff --git a/Project.toml b/Project.toml index e1faee6..383fe8a 100644 --- a/Project.toml +++ b/Project.toml @@ -6,7 +6,6 @@ version = "0.1.2" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" @@ -19,6 +18,5 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] Distributions = "0.22, 0.23" Documenter = "0.24, 0.25" -FFTW = "1.2" QuadGK = "2.3" julia = "1.3, 1.4, 1.5" diff --git a/docs/src/full_api.md b/docs/src/full_api.md index eb5e8dc..91c4b5d 100644 --- a/docs/src/full_api.md +++ b/docs/src/full_api.md @@ -38,7 +38,6 @@ Pages = ["similarities.jl"] ```@docs MonteCarloHash -ChebHash ``` ## Miscellaneous diff --git a/docs/src/function_hashing.md b/docs/src/function_hashing.md index a869b7f..c6f0f02 100644 --- a/docs/src/function_hashing.md +++ b/docs/src/function_hashing.md @@ -22,6 +22,7 @@ julia> hashfn(x -> 5x^3 - 2x^2 - 9x + 1) LSHFunctions can hash functions in any [``L^p_{\mu}(\Omega)`` function space](https://en.wikipedia.org/wiki/Lp_space) so long as ``\Omega`` has finite volume (i.e., as long as ``\int_{\Omega} d\mu(x) < +\infty``). ## Similarity statistics in function spaces + The LSHFunctions module currently supports hashing for the following similarity statistics in function spaces. ### ``L_{\mu}^p`` distance @@ -39,49 +40,15 @@ The LSHFunctions module currently supports hashing for the following similarity When ``f`` and ``g`` are allowed to take on complex values, ``g(x)`` is replaced by ``\overline{g(x)}`` (the complex conjugate of ``g(x)``) in the formula above. ### Cosine similarity + ```math \text{cossim}(f,g) = \frac{\left\langle f,g\right\rangle_{L_{\mu}^2}}{\|f\|_{L_{\mu}^2} \cdot \|g\|_{L_{\mu}^2}} ``` -## Function approximation-based hashing +## Monte Carlo-based hashing !!! warning "API subject to change" - The API for both [`ChebHash`](@ref) and [`MonteCarloHash`](@ref), but especially the former, is being modified very quickly. As a result, the docs below may change radically for future versions of the LSHFunctions package. - -Create a hash function for cosine similarity for functions in ``L^2([-1,1])``: - -``` -julia> hashfn = ChebHash(cossim, 50; interval=@interval(-1 ≤ x ≤ 1)); - -julia> n_hashes(hashfn) -50 - -julia> similarity(hashfn) == cossim -true - -julia> hashtype(hashfn) -Bool -``` - -Create a hash function for ``L^2`` distance defined over ``L^2([0,2\pi])``. Hash the functions `f(x) = cos(x)` and `f(x) = x/(2π)` using the returned [`ChebHash`](@ref): - -``` -julia> hashfn = ChebHash(L2, 3; interval=@interval(0 ≤ x ≤ 2π)); - -julia> hashfn(cos) -3-element Array{Int32,1}: - 3 - -1 - -2 - -julia> hashfn(x -> x/(2π)) -3-element Array{Int32,1}: - 0 - 1 - 0 -``` - -## Monte Carlo-based hashing + The API for [`MonteCarloHash`](@ref) is still under heavy design. As a result, the docs below may change radically for future versions of the LSHFunctions package. Create a hash function for cosine similarity for functions in ``L^2([-1,1])``: @@ -132,4 +99,5 @@ julia> length(hashfn.sample_points) ``` ## References + - Shand, William and Becker, Stephen. *Locality-sensitive hashing in function spaces*. [arXiv:2002.03909](https://arxiv.org/abs/2002.03909). diff --git a/src/LSHFunctions.jl b/src/LSHFunctions.jl index 992baae..2d7a431 100644 --- a/src/LSHFunctions.jl +++ b/src/LSHFunctions.jl @@ -30,7 +30,6 @@ include(joinpath("hashes", "lshfunction.jl")) Function hashing ========================# -include(joinpath("function_hashing", "chebhash.jl")) include(joinpath("function_hashing", "monte_carlo.jl")) #======================== @@ -43,7 +42,7 @@ export cossim, inner_prod, ℓ1, ℓ2, ℓp, L1, L2, Lp, ℓ1_norm, ℓ2_norm, # Hash functions export SimHash, L1Hash, L2Hash, MIPSHash, SignALSH, MinHash, - LSHFunction, MonteCarloHash, ChebHash, SymmetricLSHFunction, + LSHFunction, MonteCarloHash, SymmetricLSHFunction, AsymmetricLSHFunction # Helper / utility functions for LSHFunctions diff --git a/src/function_hashing/chebhash.jl b/src/function_hashing/chebhash.jl deleted file mode 100644 index 184316e..0000000 --- a/src/function_hashing/chebhash.jl +++ /dev/null @@ -1,224 +0,0 @@ -#================================================================ - -ChebHash for hashing the L^2([-1,1]) function space. - -================================================================# - -using FFTW -using Logging - -#======================== -Global constants -========================# - -const _DEFAULT_CHEBHASH_INTERVAL = @interval(-1 ≤ x ≤ 1) - -#======================== -Typedefs -========================# - -# B = basis, which is a Symbol (e.g. :Chebyshev) -struct ChebHash{B, F<:SimilarityFunction, H<:LSHFunction, I<:RealInterval} - # Discrete-space hash function used after extracting Chebyshev polynomial - # coefficients from the input function. - discrete_hashfn :: H - - # Interval over which all input functions are defined. - interval :: I - - ### Internal ChebHash constructors - function ChebHash{B,F}( - hashfn::H, - interval::I - ) where {B, F<:SimilarityFunction, H<:LSHFunction, I<:RealInterval} - - @warn "ChebHash is deprecated. Starting in version 0.2.0 ChebHash will no longer be available." - new{B,F,H,I}(hashfn, interval) - end -end - -### External ChebHash constructors -const _valid_ChebHash_similarities = ( - # Function space similarities - (L2, cossim), - # Discrete-space similarities corresponding to function space similarities - (ℓ2, cossim), -) - -@doc """ - ChebHash(sim, args...; interval=$(_DEFAULT_CHEBHASH_INTERVAL), kws...) - -Samples a hash function from an LSH family for the similarity `sim` defined over the function space ``L^p_{\\mu}(\\Omega)``. `sim` may be one of the following: -$( -join( - ["- `" * sim * "`" for sim in (_valid_ChebHash_similarities[1] .|> - string |> - collect |> - sort!) - ], - "\n" -) -) - -`ChebHash` works by approximating a function by Chebyshev polynomials. You can choose the degree of the approximation to trade between speed and generating desirable hash collision probabilities. - -!!! info "ChebHash limitations" - `ChebHash` can only hash function spaces of the form ``L^2([a,b])``, where ``[a,b]`` is an interval on the real line. For a more versatile option, checkout out [`MonteCarloHash`](@ref). - -# Arguments -- `sim`: the similarity function you want to hash on. -- `args...`: arguments to pass on when building the `LSHFunction` instance underlying the returned `ChebHash` struct. -- `kws...`: keyword arguments to pass on when building the `LSHFunction` instance underlying the returned `ChebHash` struct. - -# Examples -Create a hash function for cosine similarity for functions in ``L^2([-1,1])``: - -``` -julia> hashfn = ChebHash(cossim, 50; interval=@interval(-1 ≤ x ≤ 1)); - -julia> n_hashes(hashfn) -50 - -julia> similarity(hashfn) == cossim -true - -julia> hashtype(hashfn) -$(cossim |> LSHFunction |> hashtype) -``` - -Create a hash function for ``L^2`` distance defined over ``L^2([0,2\\pi])``. Hash the functions `f(x) = cos(x)` and `f(x) = x/(2π)` using the returned `ChebHash`: - -``` -julia> hashfn = ChebHash(L2, 3; interval=@interval(0 ≤ x ≤ 2π)); - -julia> hashfn(cos) -3-element Array{Int32,1}: - 3 - -1 - -2 - -julia> hashfn(x -> x/(2π)) -3-element Array{Int32,1}: - 0 - 1 - 0 -``` - -See also: [`MonteCarloHash`](@ref) -""" -ChebHash(similarity, args...; kws...) = - ChebHash(SimilarityFunction(similarity), args...; kws...) - -for (fn_sim, discrete_sim) in zip(_valid_ChebHash_similarities...) - quote - # Add an implementation of ChebHash that dispatches on the similarity - # function fn_sim - function ChebHash(sim::SimilarityFunction{$fn_sim}, - args...; - interval::RealInterval = _DEFAULT_CHEBHASH_INTERVAL, - kws...) where S - - discrete_hashfn = LSHFunction($discrete_sim, args...; kws...) - ChebHash{:Chebyshev,typeof(sim)}(discrete_hashfn, interval) - end - end |> eval -end - -# Implementation of ChebHash for invalid similarity functions. Just throws -# a error. -# Necessary because otherwise the first external ChebHash constructor -# will go into infinite recursion if it receives an invalid similarity -# function. -function ChebHash(sim::SimilarityFunction, args...; kws...) - valid_sims = _valid_MonteCarloHash_similarities[1] .|> - string |> - collect |> - sort! - valid_sims = join(valid_sims, ", ", " or ") - ErrorException("similarity must be $(valid_sims)") |> throw -end - -#======================== -Helper functions for ChebHash -========================# - -# Perform an order-N Chebyshev discrete transform on samples of a function -# f(x) in order to approximate the coefficients for the degree-N Chebyshev -# polynomial of best fit for f(x). -function cheb_coefficients(f, N) - # Sample f(x) at the non-uniformly spaced nodes x[1], ..., x[N], where - # - # x[i] = cos((i-1)π / (N-1)) - # - x = cos.(range(0, π, length=N)) - fx = f.(x) - - dct(fx) * √(1/N) -end - -function get_cheb_coefficients(interval::RealInterval, f; n_coeffs::Integer=1024) - f_ = squash_function(interval, f) - coeff = cheb_coefficients(f_, n_coeffs) - coeff .* √width(interval) -end - -# Transform a function f ∈ L^2([a,b]) so that the coefficients of its Chebyshev -# polynomial create an approximate isomorphism between L^2([a,b]) and ℓ2(N). -function squash_function(interval::RealInterval{T}, f) where T - lower::T, upper::T = interval.lower, interval.upper - - α = (upper - lower) / π - β = lower - - x -> @. f(α * acos(x) + β) -end - -#======================== -LSHFunction API compliance -========================# - -n_hashes(hashfn::ChebHash) = - n_hashes(hashfn.discrete_hashfn) - -similarity(::ChebHash{T,SimilarityFunction{F}}) where {T,F} = F - -hashtype(hashfn::ChebHash) = - hashtype(hashfn.discrete_hashfn) - -# TODO: this may not be true -collision_probability(hashfn::ChebHash, args...; kws...) = - collision_probability(hashfn.discrete_hashfn, args...; kws...) - -#=============== -Hash computation -===============# - -function (hashfn::ChebHash{:Chebyshev})(f; kws...) - coeff = get_cheb_coefficients(hashfn.interval, f; kws...) - hashfn.discrete_hashfn(coeff) -end - -function index_hash(hashfn::ChebHash{:Chebyshev}, f; kws...) - coeff = get_cheb_coefficients(hashfn.interval, f; kws...) - index_hash(hashfn.discrete_hashfn, coeff) -end - -function query_hash(hashfn::ChebHash{:Chebyshev}, f) - coeff = get_cheb_coefficients(hashfn.interval, f; kws...) - query_hash(hashfn.discrete_hashfn, coeff) -end - -#======================== -ChebHash API -========================# - -# Compute the similarity between f and g in the embedded space -function embedded_similarity(hashfn::ChebHash{:Chebyshev}, f, g; kws...) - f_coeff = get_cheb_coefficients(hashfn.interval, f; kws...) - g_coeff = get_cheb_coefficients(hashfn.interval, g; kws...) - - simfun = similarity(hashfn) - - Lf, Lg = length(f_coeff), length(g_coeff) - @views simfun(f_coeff[1:min(Lf,Lg)], g_coeff[1:min(Lf,Lg)]) -end diff --git a/src/function_hashing/monte_carlo.jl b/src/function_hashing/monte_carlo.jl index a91b453..6e9c273 100644 --- a/src/function_hashing/monte_carlo.jl +++ b/src/function_hashing/monte_carlo.jl @@ -145,8 +145,6 @@ julia> hashfn = MonteCarloHash(cossim, μ; volume=1.0, n_samples=512); julia> length(hashfn.sample_points) 512 ``` - -See also: [`ChebHash`](@ref) """ MonteCarloHash(similarity, args...; kws...) = MonteCarloHash(SimilarityFunction(similarity), args...; kws...) diff --git a/test/function_hashing/test_chebhash.jl b/test/function_hashing/test_chebhash.jl deleted file mode 100644 index 5604cc2..0000000 --- a/test/function_hashing/test_chebhash.jl +++ /dev/null @@ -1,157 +0,0 @@ -using Test, Random, LSHFunctions -using LinearAlgebra: norm -using QuadGK - -include(joinpath("..", "utils.jl")) - -#======================== -Tests -========================# - -@testset "ChebHash tests" begin - Random.seed!(RANDOM_SEED) - - @testset "Construct ChebHash" begin - # Hash L^2([-1,1]) over cosine similarity. - hashfn = ChebHash(cossim, 5) - - @test similarity(hashfn) == cossim - @test n_hashes(hashfn) == 5 - @test hashtype(hashfn) == hashtype(LSHFunction(cossim)) - - # Hash L^2([-1,1]) over L^2 distance - hashfn = ChebHash(L2) - - @test n_hashes(hashfn) == 1 - @test similarity(hashfn) == L2 - @test hashtype(hashfn) == hashtype(LSHFunction(ℓ2)) - end - - @testset "Provide invalid similarity" begin - # When we pass in a similarity that is not supported by ChebHash - # we should receive an error. - @test_throws(ErrorException, ChebHash((x,y) -> abs(x-y))) - @test_throws(ErrorException, ChebHash(ℓ1)) - @test_throws(ErrorException, ChebHash(L1)) - @test_throws(ErrorException, ChebHash(ℓ2)) - - # Construct a hash function (with valid similarity) in the same - # manner as we did above in case the ChebHash API ever changes. - # This ensures that we won't forget to update these tests. - _ = ChebHash(L2) - end - - #========== - Cosine similarity hashing - ==========# - @testset "Hash cosine similarity (trivial inputs)" begin - ### Hash inputs with cosine similarity -1 - f(x) = sin(x) - g(x) = -f(x) - hashfn = ChebHash(cossim, 1024) - - @test embedded_similarity(hashfn, f, g) ≈ -1 - - hf, hg = hashfn(f), hashfn(g) - @test mean(hf .== hg) == 0 - - ### Hash inputs with cosine similarity 0 - # Note: f(x)g(x) = sin(x)cos(x) = 0.5 sin(2x), so that cosine - # similarity is zero (over the interval [-1,1]). - f(x) = sin(x) - g(x) = cos(x) - - @test isapprox(embedded_similarity(hashfn, f, g), 0; atol=1e-15) - - hf, hg = hashfn(f), hashfn(g) - @test 0.45 ≤ mean(hf .== hg) ≤ 0.55 - - ### Hash inputs with cosine similarity 1 - f(x) = sin(x) - g(x) = 2f(x) - - @test embedded_similarity(hashfn, f, g) ≈ 1 - - hf, hg = hashfn(f), hashfn(g) - @test mean(hf .== hg) == 1 - end - - @testset "Hash cosine similarity (nontrivial inputs)" begin - interval = LSHFunctions.@interval(-1.0 ≤ x ≤ 1.0) - hashfn = ChebHash(cossim, 1024; interval=interval) - - trig_function_test() = begin - f = ShiftedSine(π, π * rand()) - g = ShiftedSine(π, π * rand()) - - sim = cossim(f, g, interval) - hf, hg = hashfn(f), hashfn(g) - prob = collision_probability(hashfn, sim; n_hashes=1) - - prob - 0.05 ≤ mean(hf .== hg) ≤ prob + 0.05 - end - - # Dry-run: test on a single pair of inputs - @test trig_function_test() - - # Full test: run across many pairs of inputs - @test let success = true, ii = 1 - while ii ≤ 128 && success - success = success && trig_function_test() - ii += 1 - end - success - end - end - - #========== - L^2 distance hashing - ==========# - @testset "Hash L^2 distance (trivial inputs)" begin - ### Hash two functions with L^2 distance ≈ 0 - f(x) = 0.0 - g(x) = (-0.5 ≤ x ≤ 0.5) ? 1e-3 : 0.0 - hashfn = ChebHash(L2, 1024) - - @test embedded_similarity(hashfn, f, g) ≈ 1e-3 - - hf, hg = hashfn(f), hashfn(g) - @test mean(hf .== hg) ≥ 0.95 - - ### Hash two functions with large L^2 distance - g(x) = (-0.5 ≤ x ≤ 0.5) ? 1e3 : 0.0 - - @test embedded_similarity(hashfn, f, g) ≈ 1e3 - - hf, hg = hashfn(f), hashfn(g) - @test mean(hf .== hg) ≤ 0.02 - end - - @testset "Hash L^2 distance (nontrivial inputs)" begin - interval = LSHFunctions.@interval(-1.0 ≤ x ≤ 1.0) - hashfn = ChebHash(L2, 1024; interval=interval) - - trig_function_test() = begin - f = ShiftedSine(π, π * rand()) - g = ShiftedSine(π, π * rand()) - - sim = L2(f, g, interval) - hf, hg = hashfn(f), hashfn(g) - prob = collision_probability(hashfn, sim; n_hashes=1) - - prob - 0.05 ≤ mean(hf .== hg) ≤ prob + 0.05 - end - - # Dry-run: test on a single pair of inputs - @test trig_function_test() - - # Full test: run across many pairs of inputs - @test let success = true, ii = 1 - while ii ≤ 128 && success - success = success && trig_function_test() - ii += 1 - end - success - end - end -end diff --git a/test/runtests.jl b/test/runtests.jl index a198c16..edfd793 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,6 +27,5 @@ include(joinpath("hashes", "test_sign_alsh.jl")) include(joinpath("hashes", "test_lshfunction.jl")) include(joinpath("function_hashing", "test_monte_carlo.jl")) -include(joinpath("function_hashing", "test_chebhash.jl")) include(joinpath("utils", "test_hash_compression.jl")) From 18f934d28f91a367ec1fb530c5bd3149b019c139 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Wed, 1 Jul 2020 04:15:16 -0600 Subject: [PATCH 02/15] Remove MIPSHash. Fixes #28. --- src/LSHFunctions.jl | 1 - src/hashes/mips_hash.jl | 307 ---------------------------------- test/hashes/test_mips_hash.jl | 244 --------------------------- test/runtests.jl | 1 - 4 files changed, 553 deletions(-) delete mode 100644 src/hashes/mips_hash.jl delete mode 100644 test/hashes/test_mips_hash.jl diff --git a/src/LSHFunctions.jl b/src/LSHFunctions.jl index 2d7a431..0c95055 100644 --- a/src/LSHFunctions.jl +++ b/src/LSHFunctions.jl @@ -19,7 +19,6 @@ Hash functions include(joinpath("hashes", "simhash.jl")) include(joinpath("hashes", "minhash.jl")) include(joinpath("hashes", "lphash.jl")) -include(joinpath("hashes", "mips_hash.jl")) include(joinpath("hashes", "sign_alsh.jl")) # Must be placed last, since it uses the definitions of LSHFunction subtypes diff --git a/src/hashes/mips_hash.jl b/src/hashes/mips_hash.jl deleted file mode 100644 index ad1eb8c..0000000 --- a/src/hashes/mips_hash.jl +++ /dev/null @@ -1,307 +0,0 @@ -#================================================================ - -Definition of MIPSHash for hashing on inner products. - -================================================================# - -using Logging - -#======================== -Typedefs -========================# - -mutable struct MIPSHash{T <: Union{Float32,Float64}} <: AsymmetricLSHFunction - coeff_A :: Matrix{T} - coeff_B :: Matrix{T} - scale :: T - shift :: Vector{T} - Qshift :: Vector{T} - m :: Int64 - - # An upper bound on the norm of the data points this hash function will - # process - maxnorm :: T - - # Whether or not the number of coefficients per hash function should be - # expanded to be a power of 2 whenever we need to resize coeff_A. - resize_pow2 :: Bool -end - -### External MIPSHash constructors - -@doc """ - MIPSHash(n_hashes::Integer = $(DEFAULT_N_HASHES); - dtype::Datatype = $(DEFAULT_DTYPE), - maxnorm::Union{Nothing,Real} = nothing, - scale::Real = 1, - m::Integer = 3, - resize_pow2::Bool = $(DEFAULT_RESIZE_POW2)) - -Create a `MIPSHash` hash function for hashing on inner product similarity. - -# Arguments -- $(N_HASHES_DOCSTR()) - -# Keyword parameters -- $(DTYPE_DOCSTR(MIPSHash)) -- `maxnorm::Union{Nothing,Real}` (default: `nothing`): an upper bound on the ``\\ell^2``-norm of the data points. - -!!! warning "Warning: maxnorm must be explicitly set" - The `maxnorm` keyword parameter must be explicitly specified. If it is left unspecified (or set to `nothing`), `MIPSHash()` will raise an error. - -- `scale::Real` (default: `1`): parameter that affects the probability of a hash collision. Large values of `scale` increases hash collision probability (even for inputs with low inner product similarity); small values of `scale` will decrease hash collision probability. - -# Examples -`MIPSHash` is an [`AsymmetricLSHFunction`](@ref), and hence hashes must be computed using `index_hash` and `query_hash`. - -``` -julia> hashfn = MIPSHash(5; maxnorm=10); - -julia> x = rand(4); - -julia> ih = index_hash(hashfn, x); qh = query_hash(hashfn, x); - -julia> length(ih) == length(qh) == 5 -true - -julia> typeof(ih) == typeof(qh) == Vector{Int32} -true -``` - -You need to explicitly specify the `maxnorm` keyword parameter when constructing `MIPSHash`, otherwise you will get an error. - -``` -julia> hashfn = MIPSHash(5) -ERROR: maxnorm must be specified for MIPSHash -``` - -You'll also get an error if you try to hash a vector that has norm greater than the `maxnorm` that you specified. - -``` -julia> hashfn = MIPSHash(; maxnorm=1); - -julia> index_hash(hashfn, ones(4)) -ERROR: norm 2.0 exceeds maxnorm (1.0) -``` - -# References -- Anshumali Shrivastava and Ping Li. *Asymmetric LSH (ALSH) for Sublinear Time Maximum Inner Product Search (MIPS)*. Proceedings of the 27th International Conference on Neural Information Processing Systems - Volume 2, NIPS'14, page 2321–2329, Cambridge, MA, USA, 2014. MIT Press. 10.5555/2969033.2969086. [arXiv:1410.5410](https://arxiv.org/abs/1405.5869) - -See also: [`inner_prod`](@ref), [`ℓ2_norm`](@ref ℓp_norm) -""" -@generated function MIPSHash{T}(n_hashes::Integer = DEFAULT_N_HASHES; - maxnorm::Union{Nothing,Real} = nothing, - scale::Real = 1, - m::Integer = 3, - resize_pow2::Bool = DEFAULT_RESIZE_POW2) where T - if maxnorm <: Nothing - :("maxnorm must be specified for MIPSHash" |> - ErrorException |> - throw) - else - quote - @warn "MIPSHash is deprecated. Starting in version 0.2.0 MIPSHash will no longer be available." - if n_hashes < 1 - "n_hashes must be positive" |> - ErrorException |> - throw - elseif scale ≤ 0 - "scaling factor `scale` must be positive" |> - ErrorException |> - throw - elseif m ≤ 0 - "m must be positive" |> - ErrorException |> - throw - elseif maxnorm ≤ 0 - "maxnorm must be positive" |> - ErrorException |> - throw - end - - coeff_A = Matrix{T}(undef, n_hashes, 0) - coeff_B = randn(T, n_hashes, m) - scale = T(scale) - m = Int64(m) - shift = rand(T, n_hashes) - Qshift = coeff_B * fill(T(1/2), m) ./ scale .+ shift - - MIPSHash{T}(coeff_A, coeff_B, scale, shift, Qshift, m, - maxnorm, resize_pow2) - end - end -end - -MIPSHash(args...; dtype=DEFAULT_DTYPE, kws...) = - MIPSHash{dtype}(args...; kws...) - -#============ -MIPSHash helper functions -=============# - -function Base.resize!(hashfn::MIPSHash{T}, n::Integer) where T - n = (hashfn.resize_pow2) ? nextpow(2, n) : n - - # The only field of MIPSHash that's dependent on the input size is coeff_A, - # so we only need to resize that array. - n_hashes, old_n = size(hashfn.coeff_A) - old_coeff_A = hashfn.coeff_A - new_coeff_A = similar(old_coeff_A, n_hashes, n) - - new_coeff_A[1:end, 1:min(n,old_n)] .= old_coeff_A - - if n > old_n - new_coeff_slice = @views new_coeff_A[1:end,old_n+1:end] - @views map!(x -> randn(T), new_coeff_slice, new_coeff_slice) - end - - hashfn.coeff_A = new_coeff_A -end - -current_max_input_size(hashfn::MIPSHash) = size(hashfn.coeff_A, 2) - -#======================== -Function definitions for the two hash functions used by the approximate MIPS LSH, -h(P(x)) and h(Q(x)) (where h is an L^2 LSH function). -========================# - -# Helper functions -mat(x::AbstractVector) = reshape(x, length(x), 1) -mat(x::AbstractMatrix) = x - -#========= -h(P(x)) definitions -=========# - -@generated function MIPSHash_P( - hashfn::MIPSHash{T}, - x::AbstractArray{S}) where {T,S} - - if T != S - # Perform type conversion to hit BLAS - :( MIPSHash_P(hashfn, T.(x)) ) - elseif x <: AbstractVector - :( _MIPSHash_P(hashfn, x) |> vec ) - else - :( _MIPSHash_P(hashfn, x) ) - end -end - -function _MIPSHash_P(hashfn::MIPSHash{T}, x::AbstractArray) where {T} - n = size(x,1) - if n > current_max_input_size(hashfn) - resize!(hashfn, size(x,1)) - end - - norms = col_norms(x) - for norm_ii in norms - if norm_ii > hashfn.maxnorm - "norm $(norm_ii) exceeds maxnorm ($(hashfn.maxnorm))" |> - ErrorException |> - throw - end - end - BLAS.scal!(length(norms), 1/hashfn.maxnorm, norms, 1) - - # First, perform a matvec on x and the first array of coefficients. - # Note: aTx is an n_hashes × n_inputs array - @views aTx = hashfn.coeff_A[1:end,1:n] * x .* (1/hashfn.maxnorm) |> mat - - # Compute norms^2, norms^4, ... norms^(2^m). - # Multiply these by the second array of coefficients and add them to aTx, so - # that in totality we compute - # - # aTx = [coeff_A, coeff_B] * P(x) - # = [coeff_A, coeff_B] * [x; norms^2; ...; norms^(2^m)] - # - # By making these computations in a somewhat roundabout way (rather than - # following the formula above), we save a lot of memory by avoiding - # concatenations. - # Note that m is typically small, so these iterations don't do much to harm - # performance - for ii = 1:hashfn.m - norms .^= 2 - MIPSHash_P_update_aTx!(hashfn.coeff_B[:,ii], norms, aTx) - end - - # Compute the remainder of the hash the same way we'd compute an L^p distance - # LSH. - @. aTx = aTx / hashfn.scale + hashfn.shift - - return floor.(Int32, aTx) -end - -MIPSHash_P_update_aTx!(coeff::Vector{T}, norms::Vector{T}, aTx :: Array{T}) where T = - BLAS.ger!(T(1), coeff, norms, aTx) - -MIPSHash_P_update_aTx!(coeff, norms, aTx) = - (aTx .+= coeff' * norms) - -#========== -h(Q(x)) definitions -===========# - -@generated function MIPSHash_Q( - hashfn::MIPSHash{T}, - x::AbstractArray{S}) where {T,S} - - if T != S - # Perform type conversion to hit BLAS - :( MIPSHash_Q(hashfn, T.(x)) ) - elseif x <: AbstractVector - :( _MIPSHash_Q(hashfn, x) |> vec ) - else - :( _MIPSHash_Q(hashfn, x) ) - end -end - -function _MIPSHash_Q(hashfn::MIPSHash{T}, x::AbstractArray) where T - n = size(x,1) - if n > current_max_input_size(hashfn) - resize!(hashfn, n) - end - - # First, perform a matvec on x and the first array of coefficients. - # Note: aTx is an n_hashes × n_inputs array - aTx = @views hashfn.coeff_A[1:end,1:n] * x |> mat - - # Normalize the query vectors. We perform normalization after computing - # aTx (rather than before) so that we don't have to allocate a new array - # of size(x). Moreover, for large input vectors, the size of aTx is typically - # much smaller than the size of x. - norms = col_norms(x) - for norm_ii in norms - if norm_ii > hashfn.maxnorm - "norm $(norm_ii) exceeds maxnorm ($(hashfn.maxnorm))" |> - ErrorException |> - throw - end - end - map!(x::T -> x ≈ T(0) ? T(1) : x, norms, norms) - aTx .= aTx ./ norms' - - # Here, we would multiply the second array of coefficients by the elements - # that Q(x) concatenates to x. Then we'd add this to aTx so that in total we - # compute - # - # aTx = [coeff_A, coeff_B] * Q(x) - # = [coeff_A, coeff_B] * [x; 1/2; 1/2; ...; 1/2] - # - # Then we'd proceed with computing the rest of the L^2 distance LSH. However, - # since the values concatenated on by Q(x) are always the same, we actually - # pre-compute coeff_B * [1/2; 1/2; ...; 1/2] + shift when we construct the - # MIPSHash to reduce the number of computations. - @. aTx = aTx / hashfn.scale + hashfn.Qshift - - return floor.(Int32, aTx) -end - -#======================== -LSHFunction and AsymmetricLSHFunction API compliance -========================# -index_hash(hashfn::MIPSHash, x) = MIPSHash_P(hashfn, x) -query_hash(hashfn::MIPSHash, x) = MIPSHash_Q(hashfn, x) -similarity(::MIPSHash) = inner_prod - -n_hashes(hashfn::MIPSHash) = length(hashfn.shift) -hashtype(::MIPSHash) = Int32 diff --git a/test/hashes/test_mips_hash.jl b/test/hashes/test_mips_hash.jl deleted file mode 100644 index cc70d6b..0000000 --- a/test/hashes/test_mips_hash.jl +++ /dev/null @@ -1,244 +0,0 @@ -using Test, Random, LSHFunctions - -include(joinpath("..", "utils.jl")) - -#================== -Tests -==================# -@testset "MIPSHash tests" begin - Random.seed!(RANDOM_SEED) - import LinearAlgebra: norm - import Base.Iterators: product - import SparseArrays: sprandn - - @testset "Can construct a simple MIPSHash" begin - hashfn = MIPSHash(; maxnorm=1) - - @test n_hashes(hashfn) == 1 - @test hashtype(hashfn) == Int32 - @test similarity(hashfn) == inner_prod - @test isa(hashfn, MIPSHash{Float32}) # Default dtype should be Float32 - @test isa(hashfn, LSHFunctions.AsymmetricLSHFunction) - - ## - hashfn = MIPSHash(12; maxnorm=1) - - @test n_hashes(hashfn) == 12 - - ## - hashfn = MIPSHash(; dtype=Float64, maxnorm=1) - - @test isa(hashfn, MIPSHash{Float64}) - - ## - hashfn = MIPSHash{Float64}(; maxnorm=1) - @test isa(hashfn, MIPSHash{Float64}) - - ### Invalid hash function construction - # Non-positive number of hash functions - @test_throws ErrorException MIPSHash(-1; maxnorm=1) - @test_throws ErrorException MIPSHash( 0; maxnorm=1) - - # Non-positive m - @test_throws ErrorException MIPSHash(; m = -1, maxnorm=1) - @test_throws ErrorException MIPSHash(; m = 0, maxnorm=1) - - # Non-positive scale factor - @test_throws ErrorException MIPSHash(; scale = -1, maxnorm=1) - @test_throws ErrorException MIPSHash(; scale = 0, maxnorm=1) - - # maxnorm not specified or non-positive - @test_throws ErrorException MIPSHash() - @test_throws ErrorException MIPSHash(; maxnorm=-1) - @test_throws ErrorException MIPSHash(; maxnorm=0) - end - - @testset "Hashing returns the correct data types" begin - hashfn = MIPSHash{Float64}(; maxnorm=20, scale=1, m=3) - - # Matrix{Float64} -> Matrix{Int32} - x = randn(4, 10) - p_hashes = index_hash(hashfn, x) - q_hashes = query_hash(hashfn, x) - - @test isa(p_hashes, Matrix{Int32}) - @test isa(q_hashes, Matrix{Int32}) - @test hashtype(hashfn) == eltype(p_hashes) - @test hashtype(hashfn) == eltype(q_hashes) - - # Vector{Float64} -> Vector{Int32} - x = randn(4) - p_hashes = index_hash(hashfn, x) - q_hashes = query_hash(hashfn, x) - - @test isa(index_hash(hashfn, x), Vector{Int32}) - @test isa(query_hash(hashfn, x), Vector{Int32}) - end - - @testset "MIPSHash h(P(x)) is correctly computed" begin - n_hashes = 128 - scale = 0.5 - m = 3 - x = randn(20) - maxnorm = 2*norm(x) - - hashfn = MIPSHash(n_hashes; maxnorm=maxnorm, scale=scale, m=m) - - @test size(hashfn.coeff_B) == (n_hashes, 3) - @test size(hashfn.shift) == (n_hashes,) - - hash = index_hash(hashfn, x) - - @test isa(hash, Vector{Int32}) - @test length(hash) == n_hashes - - # Since resize_pow2 was not specified, and x is the largest input seen so - # far, the number of coefficients / hash should be equal to 20 - - @test size(hashfn.coeff_A) == (n_hashes, length(x)) - - ### Compute hash manually - # Start by performing the transform P(x) - coeff = [hashfn.coeff_A hashfn.coeff_B] - u = x / maxnorm - norm_powers = [norm(u)^2, norm(u)^4, norm(u)^8] - Px = [u; norm_powers] - - # Now compute the L^2 hash of P(x) - manual_hash = coeff * Px ./ scale .+ hashfn.shift - manual_hash = floor.(Int32, manual_hash) - - @test manual_hash == hash - end - - @testset "MIPSHash h(Q(x)) is correctly computed" begin - n_hashes = 128 - scale = 0.5 - m = 3 - x = randn(20) - maxnorm = 2*norm(x) - - hashfn = MIPSHash(n_hashes; maxnorm=maxnorm, scale=scale, m=m) - - @test size(hashfn.coeff_B) == (n_hashes, m) - @test size(hashfn.shift) == (n_hashes,) - - hash = query_hash(hashfn, x) - - @test isa(hash, Vector{Int32}) - @test length(hash) == n_hashes - - # Since x is the largest input we've seen so far and resize_pow2 not - # specified, the number of coefficients per hash in hashfn.coeff_A - # should be equal to the length of x. - - @test size(hashfn.coeff_A) == (n_hashes, length(x)) - - ### Compute hash manually - u = x ./ norm(x) - Qx = [u; 1/2; 1/2; 1/2] - coeff = [hashfn.coeff_A hashfn.coeff_B] - - @test size(Qx) == (length(x)+3,) - - # Then, we compute the L^2 hash of Qx - manual_hash = coeff * Qx ./ scale .+ hashfn.shift - manual_hash = floor.(Int32, manual_hash) - - @test manual_hash == hash - end - - @testset "Hash inputs of different sizes" begin - n_hashes = 16 - hashfn = MIPSHash(n_hashes; maxnorm=1000) - - index_hash(hashfn, rand(10)) - @test size(hashfn.coeff_A) == (n_hashes, 10) - - index_hash(hashfn, rand(14)) - @test size(hashfn.coeff_A) == (n_hashes, 14) - - index_hash(hashfn, rand(8)) - @test size(hashfn.coeff_A) == (n_hashes, 14) - - query_hash(hashfn, rand(10)) - @test size(hashfn.coeff_A) == (n_hashes, 14) - - query_hash(hashfn, rand(20)) - @test size(hashfn.coeff_A) == (n_hashes, 20) - - query_hash(hashfn, rand(100)) - @test size(hashfn.coeff_A) == (n_hashes, 100) - end - - @testset "resize_pow2 increases number of coefficients to powers of 2" begin - hashfn = MIPSHash(10; maxnorm=1000, resize_pow2=true) - @test size(hashfn.coeff_A) == (10, 0) - - index_hash(hashfn, rand(3)) - @test size(hashfn.coeff_A) == (10, 4) - - query_hash(hashfn, rand(2)) - @test size(hashfn.coeff_A) == (10, 4) - - query_hash(hashfn, rand(5)) - @test size(hashfn.coeff_A) == (10, 8) - - index_hash(hashfn, rand(7)) - @test size(hashfn.coeff_A) == (10, 8) - end - - @testset "MIPSHash generates collisions for large inner products" begin - input_length = 5; n_hashes = 128; - - # Compare a random vector x against four other vectors: - # a) 10 * x - # b) x - # c) A vector of all zeros - # d) -x - x = randn(input_length) - x2, x3, x4 = 10*x, zero(x), -x - - maxnorm = (x, x2, x3, x4) .|> norm |> maximum - hashfn = MIPSHash(n_hashes; maxnorm=maxnorm) - - x_query_hashes = query_hash(hashfn, x) - - dataset = [x2 x x3 x4] - p_hashes = index_hash(hashfn, dataset) - - # Each collection of hashes should be different from one another - @test let result = true - for (ii,jj) in Iterators.product(1:4, 1:4) - if ii != jj && p_hashes[:,ii] == p_hashes[:,jj] - result = false - break - end - end - result - end - end - - @testset "Can compute hashes for sparse arrays" begin - X = sprandn(Float32, 10, 1000, 0.2) - maxnorm = X |> eachcol .|> norm |> maximum - hashfn = MIPSHash(8; maxnorm=maxnorm, scale=1, m=1) - - ihashes = index_hash(hashfn, X) - qhashes = query_hash(hashfn, X) - - # Compare against the case where X is dense - X = Matrix(X) - ihashes_dense = index_hash(hashfn, X) - qhashes_dense = query_hash(hashfn, X) - - @test ihashes == ihashes_dense - @test qhashes == qhashes_dense - end - - @testset "MIPSHash throws an error when input norm exceeds maxnorm" begin - hashfn = MIPSHash(; maxnorm=1) - @test_throws ErrorException index_hash(hashfn, ones(4)) - @test_throws ErrorException query_hash(hashfn, ones(4)) - end -end diff --git a/test/runtests.jl b/test/runtests.jl index edfd793..81474ae 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -22,7 +22,6 @@ include("test_similarities.jl") include(joinpath("hashes", "test_simhash.jl")) include(joinpath("hashes", "test_minhash.jl")) include(joinpath("hashes", "test_lphash.jl")) -include(joinpath("hashes", "test_mips_hash.jl")) include(joinpath("hashes", "test_sign_alsh.jl")) include(joinpath("hashes", "test_lshfunction.jl")) From d0f4b53722b2d377f7c9d66b42af31a74c2027a9 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 14 Aug 2020 19:08:03 -0600 Subject: [PATCH 03/15] Use Julia 1.5 for code coverage and for generating documentation. --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6f88882..5546cc5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,15 +30,15 @@ jobs: - stage: julia: 1.4 + + - stage: + julia: 1.5 after_success: # Code coverage - julia -e 'using Pkg; Pkg.add("Coverage");' - julia -e 'using Coverage; Coveralls.submit(Coveralls.process_folder());' - julia -e 'using Coverage; Codecov.submit(Codecov.process_folder());' - - stage: - julia: 1.5 - - stage: julia: nightly @@ -47,7 +47,7 @@ jobs: ######################################################## - stage: "Documentation" - julia: 1.4 + julia: 1.5 install: - sudo apt-get update - sudo apt-get install -y python3.7 python3-pip python3-setuptools From 7f22a506e367c184563529df67a08f03ab463d07 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 8 Oct 2020 00:36:30 +0000 Subject: [PATCH 04/15] CompatHelper: bump compat for "Distributions" to "0.24" --- Manifest.toml | 97 +++++++++++++++++++++++----------------------- Project.toml | 2 +- docs/Manifest.toml | 47 +++++++++++++--------- 3 files changed, 78 insertions(+), 68 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 2365ae3..4b0339c 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,20 +1,14 @@ # This file is machine-generated - editing it directly is not advised -[[Arpack]] -deps = ["Arpack_jll", "Libdl", "LinearAlgebra"] -git-tree-sha1 = "2ff92b71ba1747c5fdd541f8fc87736d82f40ec9" -uuid = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" -version = "0.4.0" - -[[Arpack_jll]] -deps = ["Libdl", "OpenBLAS_jll", "Pkg"] -git-tree-sha1 = "e214a9b9bd1b4e1b4f15b22c0994862b66af7ff7" -uuid = "68821587-b530-5797-8361-c406ea357684" -version = "3.5.0+3" - [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "8cd7b7d1c7f6fcbe7e8743a58adf57788ec7f787" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.18.0" + [[CompilerSupportLibraries_jll]] deps = ["Libdl", "Pkg"] git-tree-sha1 = "7c4f882c41faa72118841185afc58a2eb00ef612" @@ -27,42 +21,46 @@ uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" version = "1.3.0" [[DataStructures]] -deps = ["InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "edad9434967fdc0a2631a65d902228400642120c" +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "db07bb22795762895b60e44d62b34b16c982a687" uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.17.19" +version = "0.18.7" [[Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + [[Distributed]] deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" [[Distributions]] -deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"] -git-tree-sha1 = "78c4c32a2357a00a0a7d614880f02c2c6e1ec73c" +deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "StaticArrays", "Statistics", "StatsBase", "StatsFuns"] +git-tree-sha1 = "164a5b8d81743dbb9b60d6e45b4e9c0f3b8a6caf" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" -version = "0.23.4" +version = "0.24.0" [[DocStringExtensions]] deps = ["LibGit2", "Markdown", "Pkg", "Test"] -git-tree-sha1 = "c5714d9bcdba66389612dc4c47ed827c64112997" +git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1" uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.8.2" +version = "0.8.3" [[Documenter]] deps = ["Base64", "Dates", "DocStringExtensions", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "f3464968c65fc78846dad1c038c474a2c39bbb23" +git-tree-sha1 = "fb1ff838470573adc15c71ba79f8d31328f035da" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.25.0" +version = "0.25.2" [[FillArrays]] deps = ["LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "bf726ba7ce99e00d10bf63c031285fb9ab3676ae" +git-tree-sha1 = "b955c227b0d1413a1a97e2ca0635a5de019d7337" uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "0.8.11" +version = "0.9.6" [[InteractiveUtils]] deps = ["Markdown"] @@ -70,12 +68,11 @@ uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[JSON]] deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e" +git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.0" +version = "0.21.1" [[LibGit2]] -deps = ["Printf"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[Libdl]] @@ -94,19 +91,13 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[Missings]] deps = ["DataAPI"] -git-tree-sha1 = "de0a5ce9e5289f27df672ffabef4d1e5861247d5" +git-tree-sha1 = "ed61674a0864832495ffe0a7e889c0da76b0f4c8" uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "0.4.3" +version = "0.4.4" [[Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" -[[OpenBLAS_jll]] -deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] -git-tree-sha1 = "1887096f6897306a4662f7c5af936da7d5d1a062" -uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.9+4" - [[OpenSpecFun_jll]] deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] git-tree-sha1 = "d51c416559217d974a1113522d5919235ae67a87" @@ -114,24 +105,24 @@ uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" version = "0.5.3+3" [[OrderedCollections]] -git-tree-sha1 = "12ce190210d278e12644bcadf5b21cbdcf225cd3" +git-tree-sha1 = "16c08bf5dba06609fe45e30860092d6fa41fde7b" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.2.0" +version = "1.3.1" [[PDMats]] -deps = ["Arpack", "LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"] -git-tree-sha1 = "2fc6f50ddd959e462f0a2dbc802ddf2a539c6e35" +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"] +git-tree-sha1 = "95a4038d1011dfdbde7cecd2ad0ac411e53ab1bc" uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" -version = "0.9.12" +version = "0.10.1" [[Parsers]] -deps = ["Dates", "Test"] -git-tree-sha1 = "20ef902ea02f7000756a4bc19f7b9c24867c6211" +deps = ["Dates"] +git-tree-sha1 = "6fa4202675c05ba0f8268a6ddf07606350eda3ce" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.0.6" +version = "1.0.11" [[Pkg]] -deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" [[Printf]] @@ -140,9 +131,9 @@ uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" [[QuadGK]] deps = ["DataStructures", "LinearAlgebra"] -git-tree-sha1 = "dc84e810393cfc6294248c9032a9cdacc14a3db4" +git-tree-sha1 = "12fbe86da16df6679be7521dfb39fbc861e1dc7b" uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" -version = "2.3.1" +version = "2.4.1" [[REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets"] @@ -170,6 +161,10 @@ uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" [[Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + [[Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" @@ -189,15 +184,21 @@ git-tree-sha1 = "d8d8b8a9f4119829410ecd706da4cc8594a1e020" uuid = "276daf66-3868-5448-9aa4-cd146d93841b" version = "0.10.3" +[[StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "016d1e1a00fabc556473b07161da3d39726ded35" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "0.12.4" + [[Statistics]] deps = ["LinearAlgebra", "SparseArrays"] uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [[StatsBase]] deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] -git-tree-sha1 = "a6102b1f364befdb05746f386b67c6b7e3262c45" +git-tree-sha1 = "7bab7d4eb46b225b35179632852b595a3162cb61" uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -version = "0.33.0" +version = "0.33.2" [[StatsFuns]] deps = ["Rmath", "SpecialFunctions"] diff --git a/Project.toml b/Project.toml index 383fe8a..1af2588 100644 --- a/Project.toml +++ b/Project.toml @@ -16,7 +16,7 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] -Distributions = "0.22, 0.23" +Distributions = "0.22, 0.23, 0.24" Documenter = "0.24, 0.25" QuadGK = "2.3" julia = "1.3, 1.4, 1.5" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index fd8d95d..49f97cb 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -11,15 +11,15 @@ version = "0.5.10" [[ColorTypes]] deps = ["FixedPointNumbers", "Random"] -git-tree-sha1 = "cd19496d8943326b752d1712afd6ab79c7514d28" +git-tree-sha1 = "4bffea7ed1a9f0f3d1a131bbcd4b925548d75288" uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" -version = "0.10.5" +version = "0.10.9" [[Colors]] deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Reexport"] -git-tree-sha1 = "5639e44833cfcf78c6a73fbceb4da75611d312cd" +git-tree-sha1 = "008d6bc68dea6beb6303fdc37188cb557391ebf2" uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" -version = "0.12.3" +version = "0.12.4" [[Conda]] deps = ["JSON", "VersionParsing"] @@ -37,15 +37,15 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" [[DocStringExtensions]] deps = ["LibGit2", "Markdown", "Pkg", "Test"] -git-tree-sha1 = "c5714d9bcdba66389612dc4c47ed827c64112997" +git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1" uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.8.2" +version = "0.8.3" [[Documenter]] deps = ["Base64", "Dates", "DocStringExtensions", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "f3464968c65fc78846dad1c038c474a2c39bbb23" +git-tree-sha1 = "fb1ff838470573adc15c71ba79f8d31328f035da" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.25.0" +version = "0.25.2" [[DocumenterTools]] deps = ["Base64", "DocStringExtensions", "Documenter", "FileWatching", "LibGit2", "Sass"] @@ -57,9 +57,10 @@ version = "0.1.7" uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" [[FixedPointNumbers]] -git-tree-sha1 = "8fb797c37a3b7ced4327a05ac4ca0dd6a4f1ba92" +deps = ["Statistics"] +git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" -version = "0.8.1" +version = "0.8.4" [[InteractiveUtils]] deps = ["Markdown"] @@ -67,14 +68,14 @@ uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[JSON]] deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e" +git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.0" +version = "0.21.1" [[LaTeXStrings]] -git-tree-sha1 = "de44b395389b84fd681394d4e8d39ef14e3a2ea8" +git-tree-sha1 = "c7aebfecb1a60d59c0fe023a68ec947a208b1e6b" uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" -version = "1.1.0" +version = "1.2.0" [[LibGit2]] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" @@ -103,10 +104,10 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[Parsers]] -deps = ["Dates", "Test"] -git-tree-sha1 = "20ef902ea02f7000756a4bc19f7b9c24867c6211" +deps = ["Dates"] +git-tree-sha1 = "6fa4202675c05ba0f8268a6ddf07606350eda3ce" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.0.6" +version = "1.0.11" [[Pkg]] deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"] @@ -118,9 +119,9 @@ uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" [[PyCall]] deps = ["Conda", "Dates", "Libdl", "LinearAlgebra", "MacroTools", "Serialization", "VersionParsing"] -git-tree-sha1 = "3a3fdb9000d35958c9ba2323ca7c4958901f115d" +git-tree-sha1 = "b6dff5fa725eff4f775f472acd86756d6e31fb02" uuid = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" -version = "1.91.4" +version = "1.92.1" [[PyPlot]] deps = ["Colors", "LaTeXStrings", "PyCall", "Sockets", "Test", "VersionParsing"] @@ -157,6 +158,14 @@ uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" [[Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + [[Test]] deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From face85de84cb5d78b37362919cf73cf2637fd4c2 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Thu, 12 Nov 2020 21:24:28 -0700 Subject: [PATCH 05/15] Update the TagBot workflow. Update TagBot to be compatible with changes to GitHub actions. See the following link for more information: https://discourse.julialang.org/t/ann-required-updates-to-tagbot-yml/49249 --- .github/workflows/TagBot.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index d77d3a0..10556cb 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -1,10 +1,13 @@ name: TagBot on: - schedule: - - cron: 0 * * * * + issue_comment: + types: + - created + workflow_dispatch: jobs: TagBot: runs-on: ubuntu-latest + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' steps: - uses: JuliaRegistries/TagBot@v1 with: From 3b3ab8987b14e0d47606cbe7d4f10540d0bbf37e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 11 Dec 2020 00:41:56 +0000 Subject: [PATCH 06/15] CompatHelper: bump compat for "Documenter" to "0.26" --- Manifest.toml | 75 ++++++++++++++++++++++++++++------------------ Project.toml | 2 +- docs/Manifest.toml | 28 ++++++++++------- 3 files changed, 64 insertions(+), 41 deletions(-) diff --git a/Manifest.toml b/Manifest.toml index 4b0339c..c64e368 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,30 +1,36 @@ # This file is machine-generated - editing it directly is not advised +[[Artifacts]] +deps = ["Pkg"] +git-tree-sha1 = "c30985d8821e0cd73870b17b0ed0ce6dc44cb744" +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.3.0" + [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "8cd7b7d1c7f6fcbe7e8743a58adf57788ec7f787" +git-tree-sha1 = "a706ff10f1cd8dab94f59fd09c0e657db8e77ff0" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.18.0" +version = "3.23.0" [[CompilerSupportLibraries_jll]] -deps = ["Libdl", "Pkg"] -git-tree-sha1 = "7c4f882c41faa72118841185afc58a2eb00ef612" +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8e695f735fca77e9708e795eda62afdb869cbb70" uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "0.3.3+0" +version = "0.3.4+0" [[DataAPI]] -git-tree-sha1 = "176e23402d80e7743fc26c19c681bfb11246af32" +git-tree-sha1 = "ad84f52c0b8f05aa20839484dbaf01690b41ff84" uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.3.0" +version = "1.4.0" [[DataStructures]] deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "db07bb22795762895b60e44d62b34b16c982a687" +git-tree-sha1 = "fb0aa371da91c1ff9dc7fbed6122d3e411420b9c" uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.7" +version = "0.18.8" [[Dates]] deps = ["Printf"] @@ -40,9 +46,9 @@ uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" [[Distributions]] deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "StaticArrays", "Statistics", "StatsBase", "StatsFuns"] -git-tree-sha1 = "164a5b8d81743dbb9b60d6e45b4e9c0f3b8a6caf" +git-tree-sha1 = "3d85c955190e6133966df58d3088d88234b72d12" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" -version = "0.24.0" +version = "0.24.6" [[DocStringExtensions]] deps = ["LibGit2", "Markdown", "Pkg", "Test"] @@ -51,21 +57,32 @@ uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.8.3" [[Documenter]] -deps = ["Base64", "Dates", "DocStringExtensions", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "fb1ff838470573adc15c71ba79f8d31328f035da" +deps = ["Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] +git-tree-sha1 = "c01a7e8bcf7a6693444a52a0c5ac8b4e9528600e" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.25.2" +version = "0.26.0" [[FillArrays]] deps = ["LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "b955c227b0d1413a1a97e2ca0635a5de019d7337" +git-tree-sha1 = "c1cf9e87a5c45f0c05dc31ae95757f706e70865a" uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "0.9.6" +version = "0.10.1" + +[[IOCapture]] +deps = ["Logging"] +git-tree-sha1 = "377252859f740c217b936cebcd918a44f9b53b59" +uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" +version = "0.1.1" [[InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +[[JLLWrappers]] +git-tree-sha1 = "c70593677bbf2c3ccab4f7500d0f4dacfff7b75c" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.1.3" + [[JSON]] deps = ["Dates", "Mmap", "Parsers", "Unicode"] git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" @@ -99,15 +116,15 @@ version = "0.4.4" uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[OpenSpecFun_jll]] -deps = ["CompilerSupportLibraries_jll", "Libdl", "Pkg"] -git-tree-sha1 = "d51c416559217d974a1113522d5919235ae67a87" +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9db77584158d0ab52307f8c04f8e7c08ca76b5b3" uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" -version = "0.5.3+3" +version = "0.5.3+4" [[OrderedCollections]] -git-tree-sha1 = "16c08bf5dba06609fe45e30860092d6fa41fde7b" +git-tree-sha1 = "cf59cfed2e2c12e8a2ff0a4f1e9b2cd8650da6db" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.3.1" +version = "1.3.2" [[PDMats]] deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"] @@ -117,9 +134,9 @@ version = "0.10.1" [[Parsers]] deps = ["Dates"] -git-tree-sha1 = "6fa4202675c05ba0f8268a6ddf07606350eda3ce" +git-tree-sha1 = "6370b5b3cf2ce5a3d2b6f7ab2dc10f374e4d7d2b" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.0.11" +version = "1.0.14" [[Pkg]] deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"] @@ -180,15 +197,15 @@ uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[SpecialFunctions]] deps = ["OpenSpecFun_jll"] -git-tree-sha1 = "d8d8b8a9f4119829410ecd706da4cc8594a1e020" +git-tree-sha1 = "7286f31f27e3335cba31c618ac344a35eceac060" uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "0.10.3" +version = "1.1.0" [[StaticArrays]] deps = ["LinearAlgebra", "Random", "Statistics"] -git-tree-sha1 = "016d1e1a00fabc556473b07161da3d39726ded35" +git-tree-sha1 = "9da72ed50e94dbff92036da395275ed114e04d49" uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "0.12.4" +version = "1.0.1" [[Statistics]] deps = ["LinearAlgebra", "SparseArrays"] @@ -202,9 +219,9 @@ version = "0.33.2" [[StatsFuns]] deps = ["Rmath", "SpecialFunctions"] -git-tree-sha1 = "04a5a8e6ab87966b43f247920eab053fd5fdc925" +git-tree-sha1 = "3b9f665c70712af3264b61c27a7e1d62055dafd1" uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" -version = "0.9.5" +version = "0.9.6" [[SuiteSparse]] deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] diff --git a/Project.toml b/Project.toml index 1af2588..5daff8c 100644 --- a/Project.toml +++ b/Project.toml @@ -17,6 +17,6 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] Distributions = "0.22, 0.23, 0.24" -Documenter = "0.24, 0.25" +Documenter = "0.24, 0.25, 0.26" QuadGK = "2.3" julia = "1.3, 1.4, 1.5" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index 49f97cb..e7a3e07 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -23,9 +23,9 @@ version = "0.12.4" [[Conda]] deps = ["JSON", "VersionParsing"] -git-tree-sha1 = "7a58bb32ce5d85f8bf7559aa7c2842f9aecf52fc" +git-tree-sha1 = "c0647249d785f1d5139c0cc96db8f6b32f7ec416" uuid = "8f4d0f93-b110-5947-807f-2305c1781a2d" -version = "1.4.1" +version = "1.5.0" [[Dates]] deps = ["Printf"] @@ -42,16 +42,16 @@ uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.8.3" [[Documenter]] -deps = ["Base64", "Dates", "DocStringExtensions", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "fb1ff838470573adc15c71ba79f8d31328f035da" +deps = ["Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] +git-tree-sha1 = "c01a7e8bcf7a6693444a52a0c5ac8b4e9528600e" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.25.2" +version = "0.26.0" [[DocumenterTools]] deps = ["Base64", "DocStringExtensions", "Documenter", "FileWatching", "LibGit2", "Sass"] -git-tree-sha1 = "6fa30234228d9020cbe31e393e9d183e944845bb" +git-tree-sha1 = "9b40fd93f54ba5ef9d364981124a8ed389fd634e" uuid = "35a29f4d-8980-5a13-9543-d66fff28ecb8" -version = "0.1.7" +version = "0.1.9" [[FileWatching]] uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" @@ -62,6 +62,12 @@ git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" version = "0.8.4" +[[IOCapture]] +deps = ["Logging"] +git-tree-sha1 = "377252859f740c217b936cebcd918a44f9b53b59" +uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" +version = "0.1.1" + [[InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" @@ -92,9 +98,9 @@ uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" [[MacroTools]] deps = ["Markdown", "Random"] -git-tree-sha1 = "f7d2e3f654af75f01ec49be82c231c382214223a" +git-tree-sha1 = "6a8a2a625ab0dea913aba95c11370589e0239ff0" uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" -version = "0.5.5" +version = "0.5.6" [[Markdown]] deps = ["Base64"] @@ -105,9 +111,9 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[Parsers]] deps = ["Dates"] -git-tree-sha1 = "6fa4202675c05ba0f8268a6ddf07606350eda3ce" +git-tree-sha1 = "6370b5b3cf2ce5a3d2b6f7ab2dc10f374e4d7d2b" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.0.11" +version = "1.0.14" [[Pkg]] deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"] From 016f995abcd003e1b00dabb1023d44eb829f4097 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 26 Feb 2021 00:10:34 -0700 Subject: [PATCH 07/15] Improved implementation of Base.show for LSHFunction. Closes #32. Add a new implementation of Base.show(::IO, ::LSHFunction) that prints and arbitrary LSHFunction. --- src/LSHBase.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/LSHBase.jl b/src/LSHBase.jl index dbf312b..62ee078 100644 --- a/src/LSHBase.jl +++ b/src/LSHBase.jl @@ -41,6 +41,14 @@ See also: [`SymmetricLSHFunction`](@ref) """ abstract type AsymmetricLSHFunction <: LSHFunction end +function Base.show(io::IO, hashfn::LSHFunction) + args = "n_hashes = $(n_hashes(hashfn))" + args = args * ", similarity = $(similarity(hashfn))" + args = args * ", hashtype = $(hashtype(hashfn))" + msg = "$(typeof(hashfn))($args)" + print(io, msg) +end + #======================== Similarity function API ========================# From dba6dcffa2451a603dd04f7b35c597d61ee08af3 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 26 Feb 2021 00:22:54 -0700 Subject: [PATCH 08/15] Update the CI/CD pipeline to use GitHub actions rather than Travis CI. Small reformatting updates to the README. --- .github/workflows/CI.yml | 66 ++++++++++++++++++++++++++++++ .github/workflows/CompatHelper.yml | 9 ++-- .github/workflows/TagBot.yml | 3 +- .travis.yml | 63 ---------------------------- README.md | 54 ++++++++++++++++-------- 5 files changed, 109 insertions(+), 86 deletions(-) create mode 100644 .github/workflows/CI.yml delete mode 100644 .travis.yml diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..fec9d54 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,66 @@ +name: CI +on: + - push + - pull_request +jobs: + test: + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + version: + - '1' + - '1.0' + - 'nightly' + os: + - ubuntu-latest + - macOS-latest + - windows-latest + arch: + - x64 + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + arch: ${{ matrix.arch }} + - uses: actions/cache@v1 + env: + cache-name: cache-artifacts + with: + path: ~/.julia/artifacts + key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} + restore-keys: | + ${{ runner.os }}-test-${{ env.cache-name }}- + ${{ runner.os }}-test- + ${{ runner.os }}- + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + - uses: julia-actions/julia-processcoverage@v1 + - uses: codecov/codecov-action@v1 + with: + file: lcov.info + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: '1' + - run: | + julia --project=docs -e ' + using Pkg + Pkg.develop(PackageSpec(path=pwd())) + Pkg.instantiate()' + - run: | + julia --project=docs -e ' + using Documenter: DocMeta, doctest + using LSHFunctions + DocMeta.setdocmeta!(LSHFunctions, :DocTestSetup, :(using LSHFunctions); recursive=true) + doctest(LSHFunctions)' + - run: julia --project=docs docs/make.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index dd821e6..cba9134 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -1,19 +1,16 @@ name: CompatHelper - on: schedule: - - cron: '00 00 * * *' - + - cron: 0 0 * * * + workflow_dispatch: jobs: CompatHelper: runs-on: ubuntu-latest steps: - - uses: julia-actions/setup-julia@latest - with: - version: 1.3 - name: Pkg.add("CompatHelper") run: julia -e 'using Pkg; Pkg.add("CompatHelper")' - name: CompatHelper.main() env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index 10556cb..f49313b 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -6,9 +6,10 @@ on: workflow_dispatch: jobs: TagBot: - runs-on: ubuntu-latest if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest steps: - uses: JuliaRegistries/TagBot@v1 with: token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 5546cc5..0000000 --- a/.travis.yml +++ /dev/null @@ -1,63 +0,0 @@ -language: julia - -# Cache artifacts between tests: -# -# http://discourse.julialang.org/t/recommendation-cache-julia-artifacts-in-ci-services/ -# -cache: - directories: - - $HOME/.julia/artifacts - -os: linux -dist: bionic - -notifications: - email: false - -jobs: - - allow_failures: - - julia: nightly - - include: - - ######################################################## - ### Unit testing - ######################################################## - - - stage: "Unit testing" - julia: 1.3 - - - stage: - julia: 1.4 - - - stage: - julia: 1.5 - after_success: - # Code coverage - - julia -e 'using Pkg; Pkg.add("Coverage");' - - julia -e 'using Coverage; Coveralls.submit(Coveralls.process_folder());' - - julia -e 'using Coverage; Codecov.submit(Codecov.process_folder());' - - - stage: - julia: nightly - - ######################################################## - ### Documentation generation - ######################################################## - - - stage: "Documentation" - julia: 1.5 - install: - - sudo apt-get update - - sudo apt-get install -y python3.7 python3-pip python3-setuptools - - pip3 install --upgrade pip - - pip3 install --user matplotlib - - julia --project=docs -e 'using Pkg; Pkg.instantiate(); Pkg.add(PackageSpec(path=pwd()));' - - julia --project=docs -e 'using Pkg; Pkg.build("PyCall");' - script: - - julia --project=docs --color=yes docs/make.jl - after_success: skip - -script: - - julia --color=yes -e 'using Pkg; Pkg.activate(); Pkg.instantiate(); Pkg.test(coverage=true)' diff --git a/README.md b/README.md index e4005db..67da888 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ # LSHFunctions.jl -- Docs: [![Stable docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://kernelmethod.github.io/LSHFunctions.jl/stable/) [![Dev docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://kernelmethod.github.io/LSHFunctions.jl/dev/) -- Build status: [![Build Status](https://travis-ci.com/kernelmethod/LSHFunctions.jl.svg?branch=master)](https://travis-ci.com/kernelmethod/LSHFunctions.jl) -- Code coverage: [![Coverage Status](https://coveralls.io/repos/github/kernelmethod/LSHFunctions.jl/badge.svg?branch=master)](https://coveralls.io/github/kernelmethod/LSHFunctions.jl?branch=master) -[![codecov](https://codecov.io/gh/kernelmethod/LSHFunctions.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/kernelmethod/LSHFunctions.jl) -- DOI to cite this code: [![DOI](https://zenodo.org/badge/197700982.svg)](https://zenodo.org/badge/latestdoi/197700982) +[![Stable docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://kernelmethod.github.io/LSHFunctions.jl/stable/) [![Dev docs](https://img.shields.io/badge/docs-dev-blue.svg)](https://kernelmethod.github.io/LSHFunctions.jl/dev/) +[![Build Status](https://github.com/kernelmethod/LSHFunctions.jl/workflows/CI/badge.svg)](https://github.com/kernelmethod/LSHFunctions.jl/actions) +[![Codecov](https://codecov.io/gh/kernelmethod/LSHFunctions.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/kernelmethod/LSHFunctions.jl) +[![DOI](https://zenodo.org/badge/197700982.svg)](https://zenodo.org/badge/latestdoi/197700982) -A Julia package for [locality-sensitive hashing](https://en.wikipedia.org/wiki/Locality-sensitive_hashing) to accelerate similarity search. +A Julia package for [locality-sensitive +hashing](https://en.wikipedia.org/wiki/Locality-sensitive_hashing) to accelerate +similarity search. - [What's LSH?](#whats-lsh) - [Installation](#installation) @@ -14,12 +15,29 @@ A Julia package for [locality-sensitive hashing](https://en.wikipedia.org/wiki/L - [Examples](#examples) ## What's LSH? -Traditionally, if you have a data point `x`, and want to find the most similar point(s) to `x` in your database, you would compute the similarity between `x` and all of the points in your database, and keep whichever points were the most similar. For instance, this type of approach is used by the classic [k-nearest neighbors algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm). However, it has two major problems: - -- The time to find the most similar point(s) to `x` is linear in the number of points in your database. This can make similarity search prohibitively expensive for even moderately large datasets. -- In addition, the time complexity to compute the similarity between two datapoints is typically linear in the number of dimensions of those datapoints. If your data are high-dimensional (i.e. in the thousands to millions of dimensions), every similarity computation you perform can be fairly costly. - -**Locality-sensitive hashing** (LSH) is a technique for accelerating these kinds of similarity searches. Instead of measuring how similar your query point is to every point in your database, you calculate a few hashes of the query point and only compare it against those points with which it experiences a hash collision. Locality-sensitive hash functions are randomly generated, with the fundamental property that as the similarity between `x` and `y` increases, the probability of a hash collision between `x` and `y` also increases. +Traditionally, if you have a data point `x`, and want to find the most similar +point(s) to `x` in your database, you would compute the similarity between `x` +and all of the points in your database, and keep whichever points were the most +similar. For instance, this type of approach is used by the classic [k-nearest +neighbors algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm). +However, it has two major problems: + +- The time to find the most similar point(s) to `x` is linear in the number of + points in your database. This can make similarity search prohibitively + expensive for even moderately large datasets. +- In addition, the time complexity to compute the similarity between two + datapoints is typically linear in the number of dimensions of those + datapoints. If your data are high-dimensional (i.e. in the thousands to + millions of dimensions), every similarity computation you perform can be + fairly costly. + +**Locality-sensitive hashing** (LSH) is a technique for accelerating these kinds +of similarity searches. Instead of measuring how similar your query point is to +every point in your database, you calculate a few hashes of the query point and +only compare it against those points with which it experiences a hash collision. +Locality-sensitive hash functions are randomly generated, with the fundamental +property that as the similarity between `x` and `y` increases, the probability +of a hash collision between `x` and `y` also increases. ## Installation @@ -43,10 +61,12 @@ So far, there are hash functions for the similarity functions: - `MonteCarloHash` - `ChebHash` -This package still needs a lot of work, including improvement to the documentation and API. +This package still needs a lot of work, including improvement to the +documentation and API. ## Examples -The easiest way to start constructing new hash functions is by calling `LSHFunction` with the following syntax: +The easiest way to start constructing new hash functions is by calling +`LSHFunction` with the following syntax: ``` hashfn = LSHFunction(similarity function, @@ -54,7 +74,8 @@ hashfn = LSHFunction(similarity function, [LSH family-specific keyword arguments]) ``` -For example, the following snippet generates 10 locality-sensitive hash functions (bundled together into a single `SimHash` struct) for cosine similarity: +For example, the following snippet generates 10 locality-sensitive hash +functions (bundled together into a single `SimHash` ) for cosine similarity: ```julia julia> using LSHFunctions; @@ -76,4 +97,5 @@ julia> x = randn(128); julia> x_hashes = hashfn(x); ``` -For more details, [check out the LSHFunctions.jl documentation](https://kernelmethod.github.io/LSHFunctions.jl/dev/). +For more details, [check out the LSHFunctions.jl +documentation](https://kernelmethod.github.io/LSHFunctions.jl/dev/). From fdf649537173c62e1b0f133c695ec7cbcbb69a02 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 26 Feb 2021 00:24:20 -0700 Subject: [PATCH 09/15] Don't use Julia 1.0 when running tests on LSHFunctions.jl. --- .github/workflows/CI.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index fec9d54..e02ca0e 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -11,7 +11,6 @@ jobs: matrix: version: - '1' - - '1.0' - 'nightly' os: - ubuntu-latest From a1f732ddb25a69118b4142a416563d650a91728a Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 26 Feb 2021 00:43:17 -0700 Subject: [PATCH 10/15] Attempt to fix the documentation build step of the CI workflow. --- .github/workflows/CI.yml | 5 +++ Manifest.toml | 67 ++++++++++++++++++++-------------------- Project.toml | 11 +++++-- docs/Project.toml | 4 +++ 4 files changed, 51 insertions(+), 36 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index e02ca0e..305ae9c 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -48,6 +48,11 @@ jobs: - uses: julia-actions/setup-julia@v1 with: version: '1' + - run: | + sudo apt-get update + sudo apt-get install -y python3.7 python3-pip python3-setuptools + pip3 install --upgrade pip + pip3 install --user matplotlib - run: | julia --project=docs -e ' using Pkg diff --git a/Manifest.toml b/Manifest.toml index c64e368..372497e 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -9,11 +9,17 @@ version = "1.3.0" [[Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +[[ChainRulesCore]] +deps = ["Compat", "LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "de4f08843c332d355852721adb1592bce7924da3" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "0.9.29" + [[Compat]] deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] -git-tree-sha1 = "a706ff10f1cd8dab94f59fd09c0e657db8e77ff0" +git-tree-sha1 = "919c7f3151e79ff196add81d7f4e45d91bbf420b" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" -version = "3.23.0" +version = "3.25.0" [[CompilerSupportLibraries_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] @@ -22,15 +28,15 @@ uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" version = "0.3.4+0" [[DataAPI]] -git-tree-sha1 = "ad84f52c0b8f05aa20839484dbaf01690b41ff84" +git-tree-sha1 = "dfb3b7e89e395be1e25c2ad6d7690dc29cc53b1d" uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" -version = "1.4.0" +version = "1.6.0" [[DataStructures]] deps = ["Compat", "InteractiveUtils", "OrderedCollections"] -git-tree-sha1 = "fb0aa371da91c1ff9dc7fbed6122d3e411420b9c" +git-tree-sha1 = "4437b64df1e0adccc3e5d1adbc3ac741095e4677" uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -version = "0.18.8" +version = "0.18.9" [[Dates]] deps = ["Printf"] @@ -45,10 +51,10 @@ deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" [[Distributions]] -deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "StaticArrays", "Statistics", "StatsBase", "StatsFuns"] -git-tree-sha1 = "3d85c955190e6133966df58d3088d88234b72d12" +deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SparseArrays", "SpecialFunctions", "Statistics", "StatsBase", "StatsFuns"] +git-tree-sha1 = "0fc424e725eaec6ea3e9fa8df773bee18a1ab503" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" -version = "0.24.6" +version = "0.24.14" [[DocStringExtensions]] deps = ["LibGit2", "Markdown", "Pkg", "Test"] @@ -58,15 +64,15 @@ version = "0.8.3" [[Documenter]] deps = ["Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "c01a7e8bcf7a6693444a52a0c5ac8b4e9528600e" +git-tree-sha1 = "21fb992ef1b28ff8f315354d3808ebf4a8fa6e45" uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.26.0" +version = "0.26.2" [[FillArrays]] deps = ["LinearAlgebra", "Random", "SparseArrays"] -git-tree-sha1 = "c1cf9e87a5c45f0c05dc31ae95757f706e70865a" +git-tree-sha1 = "4705cc4e212c3c978c60b1b18118ec49b4d731fd" uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" -version = "0.10.1" +version = "0.11.5" [[IOCapture]] deps = ["Logging"] @@ -79,9 +85,9 @@ deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[JLLWrappers]] -git-tree-sha1 = "c70593677bbf2c3ccab4f7500d0f4dacfff7b75c" +git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" -version = "1.1.3" +version = "1.2.0" [[JSON]] deps = ["Dates", "Mmap", "Parsers", "Unicode"] @@ -90,6 +96,7 @@ uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" version = "0.21.1" [[LibGit2]] +deps = ["Printf"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[Libdl]] @@ -108,9 +115,9 @@ uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[Missings]] deps = ["DataAPI"] -git-tree-sha1 = "ed61674a0864832495ffe0a7e889c0da76b0f4c8" +git-tree-sha1 = "f8c673ccc215eb50fcadb285f522420e29e69e1c" uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" -version = "0.4.4" +version = "0.4.5" [[Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" @@ -122,9 +129,9 @@ uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" version = "0.5.3+4" [[OrderedCollections]] -git-tree-sha1 = "cf59cfed2e2c12e8a2ff0a4f1e9b2cd8650da6db" +git-tree-sha1 = "4fa2ba51070ec13fcc7517db714445b4ab986bdf" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" -version = "1.3.2" +version = "1.4.0" [[PDMats]] deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"] @@ -134,12 +141,12 @@ version = "0.10.1" [[Parsers]] deps = ["Dates"] -git-tree-sha1 = "6370b5b3cf2ce5a3d2b6f7ab2dc10f374e4d7d2b" +git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.0.14" +version = "1.0.15" [[Pkg]] -deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"] +deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" [[Printf]] @@ -196,16 +203,10 @@ deps = ["LinearAlgebra", "Random"] uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[SpecialFunctions]] -deps = ["OpenSpecFun_jll"] -git-tree-sha1 = "7286f31f27e3335cba31c618ac344a35eceac060" +deps = ["ChainRulesCore", "OpenSpecFun_jll"] +git-tree-sha1 = "5919936c0e92cff40e57d0ddf0ceb667d42e5902" uuid = "276daf66-3868-5448-9aa4-cd146d93841b" -version = "1.1.0" - -[[StaticArrays]] -deps = ["LinearAlgebra", "Random", "Statistics"] -git-tree-sha1 = "9da72ed50e94dbff92036da395275ed114e04d49" -uuid = "90137ffa-7385-5640-81b9-e52037218182" -version = "1.0.1" +version = "1.3.0" [[Statistics]] deps = ["LinearAlgebra", "SparseArrays"] @@ -213,9 +214,9 @@ uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [[StatsBase]] deps = ["DataAPI", "DataStructures", "LinearAlgebra", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics"] -git-tree-sha1 = "7bab7d4eb46b225b35179632852b595a3162cb61" +git-tree-sha1 = "400aa43f7de43aeccc5b2e39a76a79d262202b76" uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" -version = "0.33.2" +version = "0.33.3" [[StatsFuns]] deps = ["Rmath", "SpecialFunctions"] diff --git a/Project.toml b/Project.toml index 5daff8c..b1a44fd 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "LSHFunctions" uuid = "5134c85a-a9db-11e9-340f-8514dff59a31" -authors = ["Will Shand "] +authors = ["kernelmethod <17100608+kernelmethod@users.noreply.github.com>"] version = "0.1.2" [deps] @@ -13,10 +13,15 @@ QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [compat] Distributions = "0.22, 0.23, 0.24" -Documenter = "0.24, 0.25, 0.26" +Documenter = "0.26" QuadGK = "2.3" julia = "1.3, 1.4, 1.5" + +[extras] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test"] diff --git a/docs/Project.toml b/docs/Project.toml index 81686e3..0befa71 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -2,3 +2,7 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8" PyPlot = "d330b81b-6aea-500a-939a-2ce795aea3ee" + +[compat] +Documenter = "0.26" +PyPlot = "2.9.0" From 51d2e074859df24619d3bd691bc06f4d58e5009f Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 26 Feb 2021 00:48:28 -0700 Subject: [PATCH 11/15] Remove MIPSHash from the API docs. --- docs/src/full_api.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/src/full_api.md b/docs/src/full_api.md index 91c4b5d..be194e8 100644 --- a/docs/src/full_api.md +++ b/docs/src/full_api.md @@ -23,7 +23,6 @@ MinHash L1Hash L2Hash SignALSH -MIPSHash ``` ## Similarity functions From 1d325677bf225e44d2e2712b34ac89a13c596760 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 26 Feb 2021 01:15:15 -0700 Subject: [PATCH 12/15] Update LICENSE --- LICENSE.md => LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename LICENSE.md => LICENSE (96%) diff --git a/LICENSE.md b/LICENSE similarity index 96% rename from LICENSE.md rename to LICENSE index 0c9411f..cc423d3 100644 --- a/LICENSE.md +++ b/LICENSE @@ -1,4 +1,4 @@ -Copyright 2020 William Shand +Copyright 2020-2021 kernelmethod and contributors Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: From 8be0108d188e84f4db81edeeca3650214925c9c2 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 26 Feb 2021 01:39:46 -0700 Subject: [PATCH 13/15] Various updates to prepare for v0.2.0. - Remove HashCompressor -- the old implementation was pretty bare and didn't provide much value to the project. It's possible that in a future version something like HashCompressor will be added back in, but I think some additional work is needed to figure out how to make hash compression more generally useful. - Remove some unneeded dependencies from the source and from the Project.toml. - Removed Documenter and Loogging. Additionally, I've removed SHA, which was used by HashCompressor but is no longer needed after HashCompressor's removal. - Update the project version to v0.2.0. --- Manifest.toml | 30 ---------- Project.toml | 8 +-- src/LSHFunctions.jl | 1 - src/similarities.jl | 2 +- src/utils/hash_compression.jl | 89 ----------------------------- test/doctests.jl | 9 --- test/runtests.jl | 4 -- test/utils/test_hash_compression.jl | 35 ------------ 8 files changed, 3 insertions(+), 175 deletions(-) delete mode 100644 src/utils/hash_compression.jl delete mode 100644 test/doctests.jl delete mode 100644 test/utils/test_hash_compression.jl diff --git a/Manifest.toml b/Manifest.toml index 372497e..c5e07aa 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -56,30 +56,12 @@ git-tree-sha1 = "0fc424e725eaec6ea3e9fa8df773bee18a1ab503" uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" version = "0.24.14" -[[DocStringExtensions]] -deps = ["LibGit2", "Markdown", "Pkg", "Test"] -git-tree-sha1 = "50ddf44c53698f5e784bbebb3f4b21c5807401b1" -uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" -version = "0.8.3" - -[[Documenter]] -deps = ["Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"] -git-tree-sha1 = "21fb992ef1b28ff8f315354d3808ebf4a8fa6e45" -uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -version = "0.26.2" - [[FillArrays]] deps = ["LinearAlgebra", "Random", "SparseArrays"] git-tree-sha1 = "4705cc4e212c3c978c60b1b18118ec49b4d731fd" uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" version = "0.11.5" -[[IOCapture]] -deps = ["Logging"] -git-tree-sha1 = "377252859f740c217b936cebcd918a44f9b53b59" -uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" -version = "0.1.1" - [[InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" @@ -89,12 +71,6 @@ git-tree-sha1 = "a431f5f2ca3f4feef3bd7a5e94b8b8d4f2f647a0" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" version = "1.2.0" -[[JSON]] -deps = ["Dates", "Mmap", "Parsers", "Unicode"] -git-tree-sha1 = "81690084b6198a2e1da36fcfda16eeca9f9f24e4" -uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.1" - [[LibGit2]] deps = ["Printf"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" @@ -139,12 +115,6 @@ git-tree-sha1 = "95a4038d1011dfdbde7cecd2ad0ac411e53ab1bc" uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" version = "0.10.1" -[[Parsers]] -deps = ["Dates"] -git-tree-sha1 = "50c9a9ed8c714945e01cd53a21007ed3865ed714" -uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" -version = "1.0.15" - [[Pkg]] deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" diff --git a/Project.toml b/Project.toml index b1a44fd..6415e1f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,24 +1,20 @@ name = "LSHFunctions" uuid = "5134c85a-a9db-11e9-340f-8514dff59a31" authors = ["kernelmethod <17100608+kernelmethod@users.noreply.github.com>"] -version = "0.1.2" +version = "0.2.0" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" -Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" QuadGK = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [compat] Distributions = "0.22, 0.23, 0.24" -Documenter = "0.26" QuadGK = "2.3" -julia = "1.3, 1.4, 1.5" +julia = "1.5" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/src/LSHFunctions.jl b/src/LSHFunctions.jl index 0c95055..8b67a57 100644 --- a/src/LSHFunctions.jl +++ b/src/LSHFunctions.jl @@ -6,7 +6,6 @@ using Distributions, LinearAlgebra, SparseArrays Common types/utilities used throughout the module ========================# -include(joinpath("utils", "hash_compression.jl")) include(joinpath("utils", "vecops.jl")) include("LSHBase.jl") include("intervals.jl") diff --git a/src/similarities.jl b/src/similarities.jl index cdf002d..48bcb83 100644 --- a/src/similarities.jl +++ b/src/similarities.jl @@ -4,7 +4,7 @@ Definitions of various similarity functions =============================================# -using Markdown, QuadGK +using QuadGK using LinearAlgebra: dot, norm #==================== diff --git a/src/utils/hash_compression.jl b/src/utils/hash_compression.jl deleted file mode 100644 index f5328fb..0000000 --- a/src/utils/hash_compression.jl +++ /dev/null @@ -1,89 +0,0 @@ -#================================================================ - -Utilities for compressing hashes into fixed-size hashes. - -================================================================# - -using SHA - -#======================== -HashCompressor definition and constructors -========================# - -@doc """ - struct HashCompressor - -A compressor for converting variable-width hashes generated by LSHFunctions -into fixed-width hashes. HashCompressor works by taking an array of hashes -generated by an LSHFunction, and using SHA-256 to convert it into a fixed-width -hash. -""" -struct HashCompressor - n_bytes :: Int64 - salt :: Vector{UInt8} -end - -@doc """ - function HashCompressor( - n_bytes :: Integer = 32, - salt :: Union{Vector{UInt8}} = Vector{UInt8}(undef,0) - ) - -Construct a new `HashCompressor`. The created `HashCompressor` will compress hashes -into `n_bytes` bytes, and use the provided salt during hash compression. - -# Keyword arguments -- `n_bytes::Integer` (default: `32`): the number of bytes to compress hashes into. -- `salt::Vector{UInt8}` (default: `Vector{UInt8}(undef,0)`: a salt to prepend to hashes before compression using SHA-256. - -# Examples -```jldoctest; setup = :(using LSHFunctions) -julia> compressor = HashCompressor(n_bytes=4); - -julia> compressor([1, 4, 2, 9, 5, 5]) -4-element Array{UInt8,1}: - 0xf3 - 0x91 - 0x55 - 0x2e -``` -""" -function HashCompressor( - ; - n_bytes :: Integer = 32, - salt :: Union{Nothing,Vector{UInt8}} = Vector{UInt8}(undef,0) -) - if !(0 <= n_bytes <= 32) - "n_bytes must satisfy 0 <= n_bytes <= 32" |> - ErrorException |> - throw - end - - HashCompressor(Int64(n_bytes), salt) -end - -#======================== -Compression functions -========================# -(compressor::HashCompressor)(hashes::BitArray{1}) = - reinterpret(UInt8, hashes.chunks) |> compressor - -(compressor::HashCompressor)(hashes::AbstractVector{I}) where {I <: Integer} = - reinterpret(UInt8, hashes) |> compressor - -function (compressor::HashCompressor)(hashes::AbstractVector{UInt8}) - hashes = begin - if length(compressor.salt) == 0 - sha2_256(hashes) - else - sha2_256([compressor.salt; hashes]) - end - end - - if compressor.n_bytes < 32 - hashes[1:compressor.n_bytes] - else - hashes - end -end - diff --git a/test/doctests.jl b/test/doctests.jl deleted file mode 100644 index ec6c366..0000000 --- a/test/doctests.jl +++ /dev/null @@ -1,9 +0,0 @@ -#======================== -Doctests -========================# - -using Documenter, LSHFunctions, Test - -@testset "LSH doctests" begin - doctest(LSHFunctions) -end diff --git a/test/runtests.jl b/test/runtests.jl index 81474ae..17644ae 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -14,8 +14,6 @@ include("utils.jl") Tests ========================# -include("doctests.jl") - include("test_intervals.jl") include("test_similarities.jl") @@ -26,5 +24,3 @@ include(joinpath("hashes", "test_sign_alsh.jl")) include(joinpath("hashes", "test_lshfunction.jl")) include(joinpath("function_hashing", "test_monte_carlo.jl")) - -include(joinpath("utils", "test_hash_compression.jl")) diff --git a/test/utils/test_hash_compression.jl b/test/utils/test_hash_compression.jl deleted file mode 100644 index 6ca6e62..0000000 --- a/test/utils/test_hash_compression.jl +++ /dev/null @@ -1,35 +0,0 @@ -using Test, LSHFunctions - -#================== -Tests -==================# -@testset "HashCompressor tests" begin - @testset "Can compress Vector{UInt8} hashes" begin - compressor = HashCompressor(n_bytes=6) - hashes = UInt8[0x01, 0x04, 0x02, 0x08, 0x06, 0x07, 0x08, 0x04] - - @test compressor(hashes) == UInt8[0xce, 0xd8, 0x24, 0x1c, 0xc0, 0x48] - end - - @testset "Can compress Vector{Integer} hashes" begin - compressor = HashCompressor(n_bytes=4) - hashes = [-1, 8, -6, 3, -5, -9, 9, 0] - - @test compressor(hashes) == UInt8[0xb2, 0x7f, 0x8e, 0xb4] - end - - @testset "Can compress BitArray{1} hashes" begin - compressor = HashCompressor(n_bytes=5) - hashes = BitArray([1, 1, 1, 0, 0, 1, 0, 0, 1, 0]) - - @test compressor(hashes) == UInt8[0xa2, 0x99, 0xd7, 0x9f, 0x67] - end - - @testset "Can salt hashes" begin - salt = UInt8[0xcb, 0xe7, 0x12] - compressor = HashCompressor(n_bytes=6, salt=salt) - hashes = [-1, 8, -6, 3, -5, -9, 9, 0] - - @test compressor(hashes) == UInt8[0x9f, 0x5c, 0xf4, 0x3a, 0x29, 0x22] - end -end From b4dbba0794d63fa1075d869366ceb42f868097bb Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 26 Feb 2021 01:57:40 -0700 Subject: [PATCH 14/15] Minor fixes to the CI/CD pipeline. Test on Julia 1.6 instead of nightly (continue testing on nightly only for Linux). --- .github/workflows/CI.yml | 9 +++++++-- .github/workflows/CompatHelper.yml | 1 - .github/workflows/TagBot.yml | 1 - 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 305ae9c..01dc60a 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -10,7 +10,8 @@ jobs: fail-fast: false matrix: version: - - '1' + - '1' # Latest release + - '1.6' - 'nightly' os: - ubuntu-latest @@ -18,6 +19,11 @@ jobs: - windows-latest arch: - x64 + exclude: + - os: macOS-latest + version: 'nightly' + - os: windows-latest + version: 'nightly' steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 @@ -67,4 +73,3 @@ jobs: - run: julia --project=docs docs/make.jl env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml index cba9134..f198e90 100644 --- a/.github/workflows/CompatHelper.yml +++ b/.github/workflows/CompatHelper.yml @@ -12,5 +12,4 @@ jobs: - name: CompatHelper.main() env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index f49313b..778c06f 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -12,4 +12,3 @@ jobs: - uses: JuliaRegistries/TagBot@v1 with: token: ${{ secrets.GITHUB_TOKEN }} - ssh: ${{ secrets.DOCUMENTER_KEY }} From a76d5e03a8bde1c6b7b33ed3e32b41036f9660e1 Mon Sep 17 00:00:00 2001 From: kernelmethod <17100608+kernelmethod@users.noreply.github.com> Date: Fri, 26 Feb 2021 02:00:54 -0700 Subject: [PATCH 15/15] Don't run tests on v1.6. --- .github/workflows/CI.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 01dc60a..9e2dbb9 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -11,7 +11,6 @@ jobs: matrix: version: - '1' # Latest release - - '1.6' - 'nightly' os: - ubuntu-latest