How to speed up simple linear algebra optimization probelm in Julia?

Question

I implemented the LSDD changepoint detection method decribed in [1] in Julia, to see if I could make it faster than the existing python implementation [2], which is based on a grid search that looks for the optimal parameters.

I obtain the desired results but despite my best efforts, my grid search version of it takes about the same time to compute as the python one, which is still way too long for real applications. I also tried using the Optimize package which only makes things worse (2 or 3 times slower).

Here is the grid search that I implemented :

using Random
using LinearAlgebra

"""
computes the square distance netween elements of
X and C. returns squared_dist.
squared_dist[ij] = ||X[:, i] - C[:, j]||^2
"""
function squared_distance(X::Array{Float64,1},C::Array{Float64,1})
    sqd = zeros(length(X),length(C))
    for i in 1:length(X)
        for j in 1:length(C)
            sqd[i,j] = X[i]^2 + C[j]^2 - 2*X[i]*C[j]
        end
    end
    return sqd
end

# """
# Multidimensional equivalent of squared_distance.
# """
# function squared_distance(X::Array{Float64,2},C::Array{Float64,2})
#     sqd = zeros(size(X)[2],size(C)[2])
#     for i in 1:size(X)[2]
#         for j in 1:size(C)[2]
#             sqd[i,j] = sum(X.^2)[i,:] + sum(C.^2)[i,:] -2*sum(X[i,:].*C[j,:])
#         end
#     end
#     return sqd
# end

function lsdd(x::Array{Float64,1},y::Array{Float64,1}; folds = 5, sigma_list = nothing , lambda_list = nothing)
    lx,ly = length(x), length(y)
    b = min(lx+ly,300)
    C = shuffle(vcat(x,y))[1:b]
    CC_dist2 = squared_distance(C,C)
    xC_dist2, yC_dist2 = squared_distance(x,C), squared_distance(y,C)
    Tx,Ty = length(x) - div(lx,folds), length(y) - div(ly,folds)
    #Define the training and testing data sets
    cv_split1, cv_split2 = floor.(collect(1:lx)*folds/lx), floor.(collect(1:ly)*folds/ly)
    cv_index1, cv_index2 = shuffle(cv_split1), shuffle(cv_split2)
    tr_idx1,tr_idx2 = [findall(x->x!=i,cv_index1) for i in 1:folds], [findall(x->x!=i,cv_index2) for i in 1:folds]
    te_idx1,te_idx2 = [findall(x->x==i,cv_index1) for i in 1:folds], [findall(x->x==i,cv_index2) for i in 1:folds]
    xTr_dist, yTr_dist = [xC_dist2[i,:] for i in tr_idx1], [yC_dist2[i,:] for i in tr_idx2]
    xTe_dist, yTe_dist = [xC_dist2[i,:] for i in te_idx1], [yC_dist2[i,:] for i in te_idx2]
    if sigma_list == nothing
        sigma_list = [0.25, 0.5, 0.75, 1, 1.2, 1.5, 2, 2.5, 2.2, 3, 5]
    end
    if lambda_list == nothing
        lambda_list = [1.00000000e-03, 3.16227766e-03, 1.00000000e-02, 3.16227766e-02,
       1.00000000e-01, 3.16227766e-01, 1.00000000e+00, 3.16227766e+00,
       1.00000000e+01]
    end
    #memory prealocation
    score_cv = zeros(length(sigma_list),length(lambda_list))
    H = zeros(b,b)
    hx_tr, hy_tr = [zeros(b,1) for i in 1:folds], [zeros(b,1) for i in 1:folds]
    hx_te, hy_te = [zeros(1,b) for i in 1:folds], [zeros(1,b) for i in 1:folds]
    #h_tr,h_te = zeros(b,1), zeros(1,b)
    theta = zeros(b)

    for (sigma_idx,sigma) in enumerate(sigma_list)
        #the expression of H is different for higher dimension
        #H = sqrt((sigma^2)*pi)*exp.(-CC_dist2/(4*sigma^2))
        set_H(H,CC_dist2,sigma,b)
        #check if the sum is performed along the right dimension
        set_htr(hx_tr,xTr_dist,sigma,Tx), set_htr(hy_tr,yTr_dist,sigma,Ty)
        set_hte(hx_te,xTe_dist,sigma,lx-Tx), set_hte(hy_te,yTe_dist,sigma,ly-Ty)

        for i in 1:folds
            h_tr = hx_tr[i] - hy_tr[i]
            h_te = hx_te[i] - hy_te[i]
            #set_h(h_tr,hx_tr[i],hy_tr[i],b)
            #set_h(h_te,hx_te[i],hy_te[i],b)
            for (lambda_idx,lambda) in enumerate(lambda_list)
                set_theta(theta,H,lambda,h_tr,b)
                score_cv[sigma_idx,lambda_idx] += dot(theta,H*theta) - 2*dot(theta,h_te)
            end
        end
    end
    #retrieve the value of the optimal parameters
    sigma_chosen = sigma_list[findmin(score_cv)[2][2]]
    lambda_chosen = lambda_list[findmin(score_cv)[2][2]]
    #calculating the new "optimal" solution
    H = sqrt((sigma_chosen^2)*pi)*exp.(-CC_dist2/(4*sigma_chosen^2))
    H_lambda = H + lambda_chosen*Matrix{Float64}(I, b, b)
    h = (1/lx)*sum(exp.(-xC_dist2/(2*sigma_chosen^2)),dims = 1) - (1/ly)*sum(exp.(-yC_dist2/(2*sigma_chosen^2)),dims = 1)
    theta_final =  H_lambda\transpose(h)
    f = transpose(theta_final).*sum(exp.(-vcat(xC_dist2,yC_dist2)/(2*sigma_chosen^2)),dims = 1)
    L2 = 2*dot(theta_final,h) - dot(theta_final,H*theta_final)
    return L2
end

function set_H(H::Array{Float64,2},dist::Array{Float64,2},sigma::Float64,b::Int64)
    for i in 1:b
        for j in 1:b
            H[i,j] = sqrt((sigma^2)*pi)*exp(-dist[i,j]/(4*sigma^2))
        end
    end
end

function set_theta(theta::Array{Float64,1},H::Array{Float64,2},lambda::Float64,h::Array{Float64,2},b::Int64)
    Hl = (H + lambda*Matrix{Float64}(I, b, b))
    LAPACK.posv!('L', Hl, h)
    theta = h
end


function set_htr(h::Array{Array{Float64,2},1},dists::Array{Array{Float64,2},1},sigma::Float64,T::Int64)
        for (CVidx,dist) in enumerate(dists)
            for (idx,value) in enumerate((1/T)*sum(exp.(-dist/(2*sigma^2)),dims = 1))
                #h[CVidx][idx] = value
                h[CVidx][idx] = value
            end
        end
end

function set_hte(h::Array{Array{Float64,2},1},dists::Array{Array{Float64,2},1},sigma::Float64,T::Int64)
    for (CVidx,dist) in enumerate(dists)
        for (idx,value) in enumerate((1/T)*sum(exp.(-dist/(2*sigma^2)),dims = 1))
            h[CVidx][idx] = value
        end
    end
end


function set_h(h,h1,h2,b)
    for i in 1:b
        h[i] = h1[i] - h2[i]
    end
end



function getdiff(ts; window = 150)
    diff = []
    for i in 1:(length(ts)-2*window)
        pd1 = [ts[i+k] for k in 1:window]
        pd2 = [ts[i+window+k] for k in 1:window]
        push!(diff,lsdd(pd1,pd2)[1])
    end
    return diff
end

a,b = rand(500),1.5*rand(500)
@time print(lsdd(a,b))

The set_H, set_h and set_theta functions are there because I read somewhere that modifying prealocated memory in place with a function was faster, but it did not make a great difference.
To test it, I use two random distributions as input data :

x,y = rand(500),1.5*rand(500)
lsdd(x,y) #returns a value around 0.3

Now here is the version of the code where I try to use Optimizer :

function Theta(sigma::Float64,lambda::Float64,x::Array{Float64,1},y::Array{Float64,1},folds::Int8)
    lx,ly = length(x), length(y)
    b = min(lx+ly,300)
    C = shuffle(vcat(x,y))[1:b]
    CC_dist2 = squared_distance(C,C)
    xC_dist2, yC_dist2 = squared_distance(x,C), squared_distance(y,C)
    #the subsets are not be mutually exclusive !
    Tx,Ty = length(x) - div(lx,folds), length(y) - div(ly,folds)
    shuffled_x, shuffled_y = [shuffle(1:lx) for i in 1:folds], [shuffle(1:ly) for i in 1:folds]
    cv_index1, cv_index2 = floor.(collect(1:lx)*folds/lx)[shuffle(1:lx)], floor.(collect(1:ly)*folds/ly)[shuffle(1:ly)]
    tr_idx1,tr_idx2 = [i[1:Tx] for i in shuffled_x], [i[1:Ty] for i in shuffled_y]
    te_idx1,te_idx2 = [i[Tx:end] for i in shuffled_x], [i[Ty:end] for i in shuffled_y]
    xTr_dist, yTr_dist = [xC_dist2[i,:] for i in tr_idx1], [yC_dist2[i,:] for i in tr_idx2]
    xTe_dist, yTe_dist = [xC_dist2[i,:] for i in te_idx1], [yC_dist2[i,:] for i in te_idx2]
    score_cv = 0
    Id = Matrix{Float64}(I, b, b)
    H = sqrt((sigma^2)*pi)*exp.(-CC_dist2/(4*sigma^2))
    hx_tr, hy_tr = [transpose((1/Tx)*sum(exp.(-dist/(2*sigma^2)),dims = 1)) for dist in xTr_dist], [transpose((1/Ty)*sum(exp.(-dist/(2*sigma^2)),dims = 1)) for dist in yTr_dist]
    hx_te, hy_te  = [(lx-Tx)*sum(exp.(-dist/(2*sigma^2)),dims = 1) for dist in xTe_dist], [(ly-Ty)*sum(exp.(-dist/(2*sigma^2)),dims = 1) for dist in yTe_dist]
    for i in 1:folds
        h_tr, h_te = hx_tr[i] - hy_tr[i], hx_te[i] - hy_te[i]
        #theta = (H + lambda * Id)\h_tr
        theta = copy(h_tr)
        Hl = (H + lambda*Matrix{Float64}(I, b, b))
        LAPACK.posv!('L', Hl, theta)
        score_cv += dot(theta,H*theta) - 2*dot(theta,h_te)
    end
    return score_cv,(CC_dist2,xC_dist2,yC_dist2)
end

function cost(params::Array{Float64,1},x::Array{Float64,1},y::Array{Float64,1},folds::Int8)
    s,l = params[1],params[2]
    return Theta(s,l,x,y,folds)[1]
end

"""
Performs the optinization
"""

function lsdd3(x::Array{Float64,1},y::Array{Float64,1}; folds = 4)
    start = [1,0.1]
    b = min(length(x)+length(y),300)
    lx,ly = length(x),length(y)
    #result = optimize(params -> cost(params,x,y,folds),fill(0.0,2),fill(50.0,2),start, Fminbox(LBFGS(linesearch=LineSearches.BackTracking())); autodiff = :forward)
    result = optimize(params -> cost(params,x,y,folds),start, BFGS(),Optim.Options(f_calls_limit = 5, iterations = 5))
    #bboptimize(rosenbrock2d; SearchRange = [(-5.0, 5.0), (-2.0, 2.0)])
    #result = optimize(cost,[0,0],[Inf,Inf],start, Fminbox(AcceleratedGradientDescent()))
    sigma_chosen,lambda_chosen = Optim.minimizer(result)
    CC_dist2, xC_dist2, yC_dist2 = Theta(sigma_chosen,lambda_chosen,x,y,folds)[2]
    H = sqrt((sigma_chosen^2)*pi)*exp.(-CC_dist2/(4*sigma_chosen^2))
    h = (1/lx)*sum(exp.(-xC_dist2/(2*sigma_chosen^2)),dims = 1) - (1/ly)*sum(exp.(-yC_dist2/(2*sigma_chosen^2)),dims = 1)
    theta_final = (H + lambda_chosen*Matrix{Float64}(I, b, b))\transpose(h)
    f = transpose(theta_final).*sum(exp.(-vcat(xC_dist2,yC_dist2)/(2*sigma_chosen^2)),dims = 1)
    L2 = 2*dot(theta_final,h) - dot(theta_final,H*theta_final)
    return L2
end

No matter, which kind of option I use in the optimizer, I always end up with something too slow. Maybe the grid search is the best option, but I don't know how to make it faster... Does anyone have an idea how I could proceed further ?

Remark : I already posted this question on stackexchange, where I was pointed out that it might make more sense to post it here.

[1] : http://www.mcduplessis.com/wp-content/uploads/2016/05/Journal-IEICE-2014-CLSDD-1.pdf
[2] : http://www.ms.k.u-tokyo.ac.jp/software.html

This code doesn't run. The types on the set_h* functions are all incorrectly typed. Can you fix this please? — Oscar Smith, Commented Jun 4, 2019 at 15:27
@OscarSmith It runs on my computer ... Which version of julia do you have ? Or maybe this is due to the fact that I forgot to include the packages to import in my post. I corrected that. — Johncowk, Commented Jun 4, 2019 at 16:04
I'm on Julia 1.1.1 and get ERROR: LoadError: MethodError: no method matching set_H(::Array{Float64,2}, ::Array{Float64,2}, ::Float64, ::Int64) Closest candidates are: set_H(::Array{Float64,2}, ::Array{Float64,2}, ::Float64, !Matched::Int16) `` — Oscar Smith, Commented Jun 4, 2019 at 16:09
@OscarSmith Ok that's weird ... I wasn't excpecting to see Int64s, anyways I change the int16 to int64, does it work now ? — Johncowk, Commented Jun 5, 2019 at 11:38
It still errors. Please make sure your code actually runs correctly — Oscar Smith, Commented Jun 5, 2019 at 17:37

Stack Exchange Network

How to speed up simple linear algebra optimization probelm in Julia?

0

Browse other questions tagged
performance
julia
or ask your own question.

Hot Network Questions

How to speed up simple linear algebra optimization probelm in Julia?

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.

Browse other questions tagged performancejulia or ask your own question.

Related

Hot Network Questions

Browse other questions tagged
performance
julia
or ask your own question.