8.7 Exploiting Structure in Nonlinear Convex Programs

The solvers gp(), gp() and cp() are interfaces to nlcp(), which can also be called directly but requires user-provided functions for evaluating the constraint and for solving the KKT equations.

nlcp( kktsolver, F[, G, h[, A, b]])
Solves the nonlinear convex optimization problem (8.3).

The meaning of the arguments h and b is the same as for cp(). The arguments kktsolver, F, G and A are functions that must handle the following calling sequences.

As an example, we consider the 1-norm regularized least-squares problem

\begin{displaymath}
\begin{array}{ll}
\mbox{minimize} & \Vert Ax - y\Vert _2^2 + \Vert x\Vert _1
\end{array}\end{displaymath}

with variable x. The problem is equivalent to the quadratic program

\begin{displaymath}
\begin{array}{ll}
\mbox{minimize} & \Vert Ax - y\Vert _2^2...
... u \\
\mbox{subject to} & -u \preceq x \preceq u
\end{array}\end{displaymath}

with variables x and u. The implementation below is efficient when A has many more columns than rows.

from cvxopt.base import matrix, spmatrix, mul, div
from cvxopt import blas, lapack, solvers

m, n = A.size
def F(x=None):
    """
    Function and gradient evaluation of

	f = || A*x[:n] - y ||_2^2 +  sum(x[n:])
    """

    nvars = 2*n
    if x is None: return 0, matrix(0.0, (nvars,1))
    r = A*x[:n] - y
    f = blas.nrm2(r)**2 + sum(x[n:])
    gradf = matrix(1.0, (1,2*n))
    blas.gemv(A, r, gradf, alpha=2.0, trans='T')  
    return f, +gradf


def G(u, v, alpha=1.0, beta=0.0, trans='N'):
    """
	v := alpha*[I, -I; -I, -I] * u + beta * v  (trans = 'N' or 'T')
    """

    v *= beta
    v[:n] += alpha*(u[:n] - u[n:])
    v[n:] += alpha*(-u[:n] - u[n:])

h = matrix(0.0, (2*n,1))


# Customized solver for the KKT system 
#
#     [  2.0*z[0]*A'*A  0    I      -I     ] [x[:n] ]     [bx[:n] ]
#     [  0              0   -I      -I     ] [x[n:] ]  =  [bx[n:] ].
#     [  I             -I   -D1^-1   0     ] [zl[:n]]     [bzl[:n]]
#     [ -I             -I    0      -D2^-1 ] [zl[n:]]     [bzl[n:]]
#
#    
# We first eliminate zl and x[n:]:
#
#     ( 2*z[0]*A'*A + 4*D1*D2*(D1+D2)^-1 ) * x[:n] = bx[:n] - (D2-D1)*(D1+D2)^-1 * bx[n:] 
#         + D1 * ( I + (D2-D1)*(D1+D2)^-1 ) * bzl[:n] - D2 * ( I - (D2-D1)*(D1+D2)^-1 ) * bzl[n:]           
#
#     x[n:] = (D1+D2)^-1 * ( bx[n:] - D1*bzl[:n]  - D2*bzl[n:] ) - (D2-D1)*(D1+D2)^-1 * x[:n]         
#     zl[:n] = D1 * ( x[:n] - x[n:] - bzl[:n] )
#     zl[n:] = D2 * (-x[:n] - x[n:] - bzl[n:] ).
#
# The first equation has the form
#
#     (z[0]*A'*A + D)*x[:n]  =  rhs
#
# and is equivalent to
#
#     [ D    A'       ] [ x:n] ]  = [ rhs ]
#     [ A   -1/z[0]*I ] [ v    ]    [ 0   ].
#
# It can be solved as 
#
#     ( A*D^-1*A' + 1/z[0]*I ) * v = A * D^-1 * rhs
#     x[:n] = D^-1 * ( rhs - A'*v ).

S = matrix(0.0, (m,m))
Asc = matrix(0.0, (m,n))
v = matrix(0.0, (m,1))
def kktsolver(x, z, dnl, dl):

    # Factor 
    #
    #     S = A*D^-1*A' + 1/z[0]*I 
    #
    # where D = 2*D1*D2*(D1+D2)^-1, D1 = dl[:n]**2, D2 = dl[n:]**2.

    d1, d2 = dl[:n]**2, dl[n:]**2    # d1 = diag(D1), d2 = diag(D2)
    # ds is square root of diagonal of D
    ds = sqrt(2.0) * div( mul(dl[:n], dl[n:]), sqrt(d1+d2) )
    d3 =  div(d2 - d1, d1 + d2)
 
    # Asc = A*diag(d)^-1/2
    Asc = A * spmatrix( ds**-1, range(n), range(n))

    # S = 1/z[0]*I + A * D^-1 * A'
    blas.syrk(Asc, S)
    S[::m+1] += 1.0 / z[0] 
    lapack.potrf(S)

    def g(x, y, znl, zl):

        x[:n] = 0.5 * ( x[:n] - mul(d3, x[n:]) + mul(d1, zl[:n] + mul(d3, zl[:n])) - mul(d2, zl[n:] - mul(d3, zl[n:])) )
        x[:n] = div( x[:n], ds) 

        # Solve
        #
        #     S * v = 0.5 * A * D^-1 * ( bx[:n] - (D2-D1)*(D1+D2)^-1 * bx[n:] 
        #             + D1 * ( I + (D2-D1)*(D1+D2)^-1 ) * bzl[:n] - D2 * ( I - (D2-D1)*(D1+D2)^-1 ) * bzl[n:] )
	    
        blas.gemv(Asc, x, v)
        lapack.potrs(S, v)
	
        # x[:n] = D^-1 * ( rhs - A'*v ).
        blas.gemv(Asc, v, x, alpha=-1.0, beta=1.0, trans='T')
        x[:n] = div(x[:n], ds)

        # x[n:] = (D1+D2)^-1 * ( bx[n:] - D1*bzl[:n]  - D2*bzl[n:] ) - (D2-D1)*(D1+D2)^-1 * x[:n]         
        x[n:] = div( x[n:] - mul(d1, zl[:n]) - mul(d2, zl[n:]), d1+d2 ) - mul( d3, x[:n] )
	    
        # zl[:n] = D1 * (  x[:n] - x[n:] - bzl[:n] )
        # zl[n:] = D2 * ( -x[:n] - x[n:] - bzl[n:] ).
        zl[:n] = mul( d1,  x[:n] - x[n:] - zl[:n] ) 
        zl[n:] = mul( d2, -x[:n] - x[n:] - zl[n:] ) 

    return g

x = solvers.nlcp(kktsolver, F, G, h)['x'][:n]