diff --git a/src/LSHFunctions.jl b/src/LSHFunctions.jl index 0c95055..b1f33ac 100644 --- a/src/LSHFunctions.jl +++ b/src/LSHFunctions.jl @@ -36,7 +36,7 @@ Exports ========================# # Similarity functions, norms, inner products -export cossim, inner_prod, ℓ1, ℓ2, ℓp, L1, L2, Lp, ℓ1_norm, ℓ2_norm, +export cossim, hamming, inner_prod, ℓ1, ℓ2, ℓp, L1, L2, Lp, ℓ1_norm, ℓ2_norm, ℓp_norm, L1_norm, L2_norm, Lp_norm, jaccard # Hash functions diff --git a/src/similarities.jl b/src/similarities.jl index cdf002d..2e5cac0 100644 --- a/src/similarities.jl +++ b/src/similarities.jl @@ -11,6 +11,48 @@ using LinearAlgebra: dot, norm Definitions of built-in similarity functions ====================# +#==================== +Hamming distance +====================# + +@doc raw""" + hamming(x,y) + +Computes the Hamming distance between two bit vectors ``x`` and ``y``. Hamming distance is defined as the number of locations in which ``x`` and ``y`` disagree, i.e. + +``\text{Hamming}(x,y) = \sum_i |x_i - y_i|`` + +# Arguments +- `x::Union{BitArray{1},Vector{Bool}}`, `y::Union{BitArray{1},Vector{Bool}}`: the two bit vectors between the Hamming distance is computed. + +# Examples +```jldoctest; setup = :(using LSHFunctions) +julia> x = BitArray([1, 0, 1, 1]); + +julia> y = BitArray([0, 0, 1, 0]); + +julia> hamming(x,y) +2 +``` +""" +function hamming( + x::Union{BitArray{1}, Vector{Bool}}, + y::Union{BitArray{1}, Vector{Bool}} +) + if length(x) != length(y) + "Lengths of x and y are not equal ($(length(x)) != $(length(y))" |> + ErrorException |> + throw + end + + total = 0 + @inbounds @simd for ii = 1:length(x) + total += (x[ii] != y[ii]) + end + + return total +end + #==================== Cosine similarity ====================# diff --git a/test/test_similarities.jl b/test/test_similarities.jl index d23fe97..328fb5d 100644 --- a/test/test_similarities.jl +++ b/test/test_similarities.jl @@ -7,6 +7,18 @@ include("utils.jl") Tests ==================# +@testset "Hamming distance tests" begin + @testset "Calculate Hamming distances between bit vectors" begin + x = BitArray([1, 1, 0, 1, 0, 1, 0, 1]) + y = BitArray([1, 1, 1, 1, 0, 0, 0, 1]) + z = BitArray([1, 1, 1, 0, 1, 1, 1, 0]) + + @test hamming(x, y) == 2 + @test hamming(x, z) == 5 + @test hamming(y, z) == 5 + end +end + @testset "ℓ^p distance and norm tests" begin Random.seed!(RANDOM_SEED)