94

I am looking for a sample code implementation on how to invert a 4x4 matrix. I know there is Gaussian eleminiation, LU decomposition, etc., but instead of looking at them in detail I am really just looking for the code to do this.

Language ideally C++, data is available in array of 16 floats in column-major order.

6
  • 3
    Is this homework? If not (e.g. you're just trying to solve Ax=b), then trying to explicitly compute an inverse may not be what you want to do. Commented Jul 18, 2009 at 19:46
  • 11
    it is not homework. it is for a personal project. and i dont want to "waste" time on learning matrix inversion for 4x4 which seems quite complicated compared to 3x3
    – clamp
    Commented Jul 18, 2009 at 20:59
  • 13
    I do not think this is a stupid question that deserves -1 score.
    – stribika
    Commented Jul 18, 2009 at 21:19
  • 4
    if your matrix is a rotation/scaling/translation one, related: stackoverflow.com/questions/155670/… & web.archive.org/web/20130806093214/http://…
    – user719662
    Commented Jan 29, 2015 at 19:48
  • you may also be interested in this if you want a bit more performance lxjk.github.io/2017/09/03/… and you cannot process multiple matrices at a time
    – Sopel
    Commented Feb 24, 2020 at 12:14

11 Answers 11

121

here:

bool gluInvertMatrix(const double m[16], double invOut[16])
{
    double inv[16], det;
    int i;

    inv[0] = m[5]  * m[10] * m[15] - 
             m[5]  * m[11] * m[14] - 
             m[9]  * m[6]  * m[15] + 
             m[9]  * m[7]  * m[14] +
             m[13] * m[6]  * m[11] - 
             m[13] * m[7]  * m[10];

    inv[4] = -m[4]  * m[10] * m[15] + 
              m[4]  * m[11] * m[14] + 
              m[8]  * m[6]  * m[15] - 
              m[8]  * m[7]  * m[14] - 
              m[12] * m[6]  * m[11] + 
              m[12] * m[7]  * m[10];

    inv[8] = m[4]  * m[9] * m[15] - 
             m[4]  * m[11] * m[13] - 
             m[8]  * m[5] * m[15] + 
             m[8]  * m[7] * m[13] + 
             m[12] * m[5] * m[11] - 
             m[12] * m[7] * m[9];

    inv[12] = -m[4]  * m[9] * m[14] + 
               m[4]  * m[10] * m[13] +
               m[8]  * m[5] * m[14] - 
               m[8]  * m[6] * m[13] - 
               m[12] * m[5] * m[10] + 
               m[12] * m[6] * m[9];

    inv[1] = -m[1]  * m[10] * m[15] + 
              m[1]  * m[11] * m[14] + 
              m[9]  * m[2] * m[15] - 
              m[9]  * m[3] * m[14] - 
              m[13] * m[2] * m[11] + 
              m[13] * m[3] * m[10];

    inv[5] = m[0]  * m[10] * m[15] - 
             m[0]  * m[11] * m[14] - 
             m[8]  * m[2] * m[15] + 
             m[8]  * m[3] * m[14] + 
             m[12] * m[2] * m[11] - 
             m[12] * m[3] * m[10];

    inv[9] = -m[0]  * m[9] * m[15] + 
              m[0]  * m[11] * m[13] + 
              m[8]  * m[1] * m[15] - 
              m[8]  * m[3] * m[13] - 
              m[12] * m[1] * m[11] + 
              m[12] * m[3] * m[9];

    inv[13] = m[0]  * m[9] * m[14] - 
              m[0]  * m[10] * m[13] - 
              m[8]  * m[1] * m[14] + 
              m[8]  * m[2] * m[13] + 
              m[12] * m[1] * m[10] - 
              m[12] * m[2] * m[9];

    inv[2] = m[1]  * m[6] * m[15] - 
             m[1]  * m[7] * m[14] - 
             m[5]  * m[2] * m[15] + 
             m[5]  * m[3] * m[14] + 
             m[13] * m[2] * m[7] - 
             m[13] * m[3] * m[6];

    inv[6] = -m[0]  * m[6] * m[15] + 
              m[0]  * m[7] * m[14] + 
              m[4]  * m[2] * m[15] - 
              m[4]  * m[3] * m[14] - 
              m[12] * m[2] * m[7] + 
              m[12] * m[3] * m[6];

    inv[10] = m[0]  * m[5] * m[15] - 
              m[0]  * m[7] * m[13] - 
              m[4]  * m[1] * m[15] + 
              m[4]  * m[3] * m[13] + 
              m[12] * m[1] * m[7] - 
              m[12] * m[3] * m[5];

    inv[14] = -m[0]  * m[5] * m[14] + 
               m[0]  * m[6] * m[13] + 
               m[4]  * m[1] * m[14] - 
               m[4]  * m[2] * m[13] - 
               m[12] * m[1] * m[6] + 
               m[12] * m[2] * m[5];

    inv[3] = -m[1] * m[6] * m[11] + 
              m[1] * m[7] * m[10] + 
              m[5] * m[2] * m[11] - 
              m[5] * m[3] * m[10] - 
              m[9] * m[2] * m[7] + 
              m[9] * m[3] * m[6];

    inv[7] = m[0] * m[6] * m[11] - 
             m[0] * m[7] * m[10] - 
             m[4] * m[2] * m[11] + 
             m[4] * m[3] * m[10] + 
             m[8] * m[2] * m[7] - 
             m[8] * m[3] * m[6];

    inv[11] = -m[0] * m[5] * m[11] + 
               m[0] * m[7] * m[9] + 
               m[4] * m[1] * m[11] - 
               m[4] * m[3] * m[9] - 
               m[8] * m[1] * m[7] + 
               m[8] * m[3] * m[5];

    inv[15] = m[0] * m[5] * m[10] - 
              m[0] * m[6] * m[9] - 
              m[4] * m[1] * m[10] + 
              m[4] * m[2] * m[9] + 
              m[8] * m[1] * m[6] - 
              m[8] * m[2] * m[5];

    det = m[0] * inv[0] + m[1] * inv[4] + m[2] * inv[8] + m[3] * inv[12];

    if (det == 0)
        return false;

    det = 1.0 / det;

    for (i = 0; i < 16; i++)
        invOut[i] = inv[i] * det;

    return true;
}

This was lifted from MESA implementation of the GLU library.

17
  • 11
    You probably wouldn't want it any other way.
    – shoosh
    Commented Jul 19, 2009 at 0:06
  • 41
    Sadly, that code isn't really that straightforward to make in a loopable way to begin with, much less a way that a compiler can adequately unroll. Also, that code comes from a rather old C library which has a LOT of very fiddly optimizations, and it's code that works already (and has been thoroughly tested and proven by thousands of Linux OpenGL programs at this point) so why rewrite it?
    – fluffy
    Commented Aug 22, 2011 at 20:18
  • 17
    Is this for column major or row major ordered matrices?
    – Zoomulator
    Commented Jan 25, 2012 at 19:00
  • 36
    Zoomulator: Awesomely it is for both! This is because inverse(transpose(A)) = transpose(inverse(A)).
    – Timmmm
    Commented Feb 15, 2012 at 20:52
  • 3
    @dfeuer It would take 72 multiplications to get the determinant first. While doing it first would actually be faster in case of non-invertible matrix, for an invertible matrix it saves 68 multiplications by doing it after. Also there's the bonus of having less code this way.
    – crazyjoe
    Commented Jul 31, 2014 at 18:22
34

If anyone looking for more costumized code and "easier to read", then I got this

var A2323 = m.m22 * m.m33 - m.m23 * m.m32 ;
var A1323 = m.m21 * m.m33 - m.m23 * m.m31 ;
var A1223 = m.m21 * m.m32 - m.m22 * m.m31 ;
var A0323 = m.m20 * m.m33 - m.m23 * m.m30 ;
var A0223 = m.m20 * m.m32 - m.m22 * m.m30 ;
var A0123 = m.m20 * m.m31 - m.m21 * m.m30 ;
var A2313 = m.m12 * m.m33 - m.m13 * m.m32 ;
var A1313 = m.m11 * m.m33 - m.m13 * m.m31 ;
var A1213 = m.m11 * m.m32 - m.m12 * m.m31 ;
var A2312 = m.m12 * m.m23 - m.m13 * m.m22 ;
var A1312 = m.m11 * m.m23 - m.m13 * m.m21 ;
var A1212 = m.m11 * m.m22 - m.m12 * m.m21 ;
var A0313 = m.m10 * m.m33 - m.m13 * m.m30 ;
var A0213 = m.m10 * m.m32 - m.m12 * m.m30 ;
var A0312 = m.m10 * m.m23 - m.m13 * m.m20 ;
var A0212 = m.m10 * m.m22 - m.m12 * m.m20 ;
var A0113 = m.m10 * m.m31 - m.m11 * m.m30 ;
var A0112 = m.m10 * m.m21 - m.m11 * m.m20 ;

var det = m.m00 * ( m.m11 * A2323 - m.m12 * A1323 + m.m13 * A1223 ) 
    - m.m01 * ( m.m10 * A2323 - m.m12 * A0323 + m.m13 * A0223 ) 
    + m.m02 * ( m.m10 * A1323 - m.m11 * A0323 + m.m13 * A0123 ) 
    - m.m03 * ( m.m10 * A1223 - m.m11 * A0223 + m.m12 * A0123 ) ;
det = 1 / det;

return new Matrix4x4() {
   m00 = det *   ( m.m11 * A2323 - m.m12 * A1323 + m.m13 * A1223 ),
   m01 = det * - ( m.m01 * A2323 - m.m02 * A1323 + m.m03 * A1223 ),
   m02 = det *   ( m.m01 * A2313 - m.m02 * A1313 + m.m03 * A1213 ),
   m03 = det * - ( m.m01 * A2312 - m.m02 * A1312 + m.m03 * A1212 ),
   m10 = det * - ( m.m10 * A2323 - m.m12 * A0323 + m.m13 * A0223 ),
   m11 = det *   ( m.m00 * A2323 - m.m02 * A0323 + m.m03 * A0223 ),
   m12 = det * - ( m.m00 * A2313 - m.m02 * A0313 + m.m03 * A0213 ),
   m13 = det *   ( m.m00 * A2312 - m.m02 * A0312 + m.m03 * A0212 ),
   m20 = det *   ( m.m10 * A1323 - m.m11 * A0323 + m.m13 * A0123 ),
   m21 = det * - ( m.m00 * A1323 - m.m01 * A0323 + m.m03 * A0123 ),
   m22 = det *   ( m.m00 * A1313 - m.m01 * A0313 + m.m03 * A0113 ),
   m23 = det * - ( m.m00 * A1312 - m.m01 * A0312 + m.m03 * A0112 ),
   m30 = det * - ( m.m10 * A1223 - m.m11 * A0223 + m.m12 * A0123 ),
   m31 = det *   ( m.m00 * A1223 - m.m01 * A0223 + m.m02 * A0123 ),
   m32 = det * - ( m.m00 * A1213 - m.m01 * A0213 + m.m02 * A0113 ),
   m33 = det *   ( m.m00 * A1212 - m.m01 * A0212 + m.m02 * A0112 ),
};

I don't write the code, but my program did. I made a small program to make a program that calculate the determinant and inverse of any N-matrix.

I do it because once in the past I need a code that inverses 5x5 matrix, but nobody in the earth have done this so I made one.

Take a look about the program here.

EDIT: The matrix layout is row-by-row (meaning m01 is in the first row and second column). Also the language is C#, but should be easy to convert into C.

3
  • 3
    116 float multiplications compared with 200 for the accepted answer. (and determinant checking before doing majority of calculation)
    – paddyg
    Commented Jul 16, 2018 at 20:34
  • 2
    This answer feels like a gift from God. You even use the same naming convention for matrix elements as I do.
    – Stuntddude
    Commented Oct 4, 2019 at 2:55
  • 1
    Really good answer, but regarding this " I need a code that inverses 5x5 matrix, but nobody in the earth have done this" --- The reason for that is probably, that it is cheaper to use a direct solver (Gauss, LU) than using a formula based on determinants (Cramers rule?).
    – wychmaster
    Commented Jun 4, 2020 at 8:49
7

I 'rolled up' the MESA implementation (also wrote a couple of unit tests to ensure it actually works).

Here:

float invf(int i,int j,const float* m){

    int o = 2+(j-i);

    i += 4+o;
    j += 4-o;

    #define e(a,b) m[ ((j+b)%4)*4 + ((i+a)%4) ]

    float inv =
     + e(+1,-1)*e(+0,+0)*e(-1,+1)
     + e(+1,+1)*e(+0,-1)*e(-1,+0)
     + e(-1,-1)*e(+1,+0)*e(+0,+1)
     - e(-1,-1)*e(+0,+0)*e(+1,+1)
     - e(-1,+1)*e(+0,-1)*e(+1,+0)
     - e(+1,-1)*e(-1,+0)*e(+0,+1);

    return (o%2)?inv : -inv;

    #undef e

}

bool inverseMatrix4x4(const float *m, float *out)
{

    float inv[16];

    for(int i=0;i<4;i++)
        for(int j=0;j<4;j++)
            inv[j*4+i] = invf(i,j,m);

    double D = 0;

    for(int k=0;k<4;k++) D += m[k] * inv[k*4];

    if (D == 0) return false;

    D = 1.0 / D;

    for (int i = 0; i < 16; i++)
        out[i] = inv[i] * D;

    return true;

}

I wrote a little about this and display the pattern of positive/negative factors on my blog.

As suggested by @LiraNuna, on many platforms hardware accelerated versions of such routines are available so I'm happy to have a 'backup version' that's readable and concise.

Note: this may run 3.5 times slower or worse than the MESA implementation. You can shift the pattern of factors to remove some additions etc... but it would lose in readability and still won't be very fast.

6

If you need a C++ matrix library with a lot of functions, have a look at Eigen library - http://eigen.tuxfamily.org

6

This is the C++ version for @willnode's answer

template<typename Matrix>
static inline void InvertMatrix4(const Matrix& m, Matrix& im, double& det)
{
    double A2323 = m(2, 2) * m(3, 3) - m(2, 3) * m(3, 2);
    double A1323 = m(2, 1) * m(3, 3) - m(2, 3) * m(3, 1);
    double A1223 = m(2, 1) * m(3, 2) - m(2, 2) * m(3, 1);
    double A0323 = m(2, 0) * m(3, 3) - m(2, 3) * m(3, 0);
    double A0223 = m(2, 0) * m(3, 2) - m(2, 2) * m(3, 0);
    double A0123 = m(2, 0) * m(3, 1) - m(2, 1) * m(3, 0);
    double A2313 = m(1, 2) * m(3, 3) - m(1, 3) * m(3, 2);
    double A1313 = m(1, 1) * m(3, 3) - m(1, 3) * m(3, 1);
    double A1213 = m(1, 1) * m(3, 2) - m(1, 2) * m(3, 1);
    double A2312 = m(1, 2) * m(2, 3) - m(1, 3) * m(2, 2);
    double A1312 = m(1, 1) * m(2, 3) - m(1, 3) * m(2, 1);
    double A1212 = m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1);
    double A0313 = m(1, 0) * m(3, 3) - m(1, 3) * m(3, 0);
    double A0213 = m(1, 0) * m(3, 2) - m(1, 2) * m(3, 0);
    double A0312 = m(1, 0) * m(2, 3) - m(1, 3) * m(2, 0);
    double A0212 = m(1, 0) * m(2, 2) - m(1, 2) * m(2, 0);
    double A0113 = m(1, 0) * m(3, 1) - m(1, 1) * m(3, 0);
    double A0112 = m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0);

    det = m(0, 0) * ( m(1, 1) * A2323 - m(1, 2) * A1323 + m(1, 3) * A1223 )
        - m(0, 1) * ( m(1, 0) * A2323 - m(1, 2) * A0323 + m(1, 3) * A0223 )
        + m(0, 2) * ( m(1, 0) * A1323 - m(1, 1) * A0323 + m(1, 3) * A0123 )
        - m(0, 3) * ( m(1, 0) * A1223 - m(1, 1) * A0223 + m(1, 2) * A0123 );
    det = 1 / det;

    im(0, 0) = det *   ( m(1, 1) * A2323 - m(1, 2) * A1323 + m(1, 3) * A1223 );
    im(0, 1) = det * - ( m(0, 1) * A2323 - m(0, 2) * A1323 + m(0, 3) * A1223 );
    im(0, 2) = det *   ( m(0, 1) * A2313 - m(0, 2) * A1313 + m(0, 3) * A1213 );
    im(0, 3) = det * - ( m(0, 1) * A2312 - m(0, 2) * A1312 + m(0, 3) * A1212 );
    im(1, 0) = det * - ( m(1, 0) * A2323 - m(1, 2) * A0323 + m(1, 3) * A0223 );
    im(1, 1) = det *   ( m(0, 0) * A2323 - m(0, 2) * A0323 + m(0, 3) * A0223 );
    im(1, 2) = det * - ( m(0, 0) * A2313 - m(0, 2) * A0313 + m(0, 3) * A0213 );
    im(1, 3) = det *   ( m(0, 0) * A2312 - m(0, 2) * A0312 + m(0, 3) * A0212 );
    im(2, 0) = det *   ( m(1, 0) * A1323 - m(1, 1) * A0323 + m(1, 3) * A0123 );
    im(2, 1) = det * - ( m(0, 0) * A1323 - m(0, 1) * A0323 + m(0, 3) * A0123 );
    im(2, 2) = det *   ( m(0, 0) * A1313 - m(0, 1) * A0313 + m(0, 3) * A0113 );
    im(2, 3) = det * - ( m(0, 0) * A1312 - m(0, 1) * A0312 + m(0, 3) * A0112 );
    im(3, 0) = det * - ( m(1, 0) * A1223 - m(1, 1) * A0223 + m(1, 2) * A0123 );
    im(3, 1) = det *   ( m(0, 0) * A1223 - m(0, 1) * A0223 + m(0, 2) * A0123 );
    im(3, 2) = det * - ( m(0, 0) * A1213 - m(0, 1) * A0213 + m(0, 2) * A0113 );
    im(3, 3) = det *   ( m(0, 0) * A1212 - m(0, 1) * A0212 + m(0, 2) * A0112 );
}
0
2

You can use the GNU Scientific Library or look the code up in it.

Edit: You seem to want the Linear Algebra section.

1
  • i did infact look at the matrix struct from gsl, but it doesnt seem to have a function for determinant or inversion.
    – clamp
    Commented Jul 18, 2009 at 19:43
2

Inspired by @shoosh to check out MESA implementations, I found that matrix inversion looks quite different in more recent mesa releases. I suppose those are good improvements. Here's the matrix inversion code from Mesa-17.3.9:

/* Returns true for success, false for failure (singular matrix) */
bool DirectVolumeRenderer::_mesa_invert_matrix_general( GLfloat out[16], const GLfloat in[16] )
{
    /**
     * References an element of 4x4 matrix.
     * Calculate the linear storage index of the element and references it. 
     */
    #define MAT(m,r,c) (m)[(c)*4+(r)]
    /**
     * Swaps the values of two floating point variables.
     */
    #define SWAP_ROWS(a, b) { GLfloat *_tmp = a; (a)=(b); (b)=_tmp; }

    const GLfloat *m = in;
    GLfloat wtmp[4][8];
    GLfloat m0, m1, m2, m3, s;
    GLfloat *r0, *r1, *r2, *r3;

    r0 = wtmp[0], r1 = wtmp[1], r2 = wtmp[2], r3 = wtmp[3];

    r0[0] = MAT(m,0,0), r0[1] = MAT(m,0,1),
    r0[2] = MAT(m,0,2), r0[3] = MAT(m,0,3),
    r0[4] = 1.0, r0[5] = r0[6] = r0[7] = 0.0,

    r1[0] = MAT(m,1,0), r1[1] = MAT(m,1,1),
    r1[2] = MAT(m,1,2), r1[3] = MAT(m,1,3),
    r1[5] = 1.0, r1[4] = r1[6] = r1[7] = 0.0,

    r2[0] = MAT(m,2,0), r2[1] = MAT(m,2,1),
    r2[2] = MAT(m,2,2), r2[3] = MAT(m,2,3),
    r2[6] = 1.0, r2[4] = r2[5] = r2[7] = 0.0,

    r3[0] = MAT(m,3,0), r3[1] = MAT(m,3,1),
    r3[2] = MAT(m,3,2), r3[3] = MAT(m,3,3),
    r3[7] = 1.0, r3[4] = r3[5] = r3[6] = 0.0;

    /* choose pivot - or die */
    if (fabsf(r3[0])>fabsf(r2[0])) SWAP_ROWS(r3, r2);
    if (fabsf(r2[0])>fabsf(r1[0])) SWAP_ROWS(r2, r1);
    if (fabsf(r1[0])>fabsf(r0[0])) SWAP_ROWS(r1, r0);
    if (0.0F == r0[0])
        return false;

    /* eliminate first variable     */
    m1 = r1[0]/r0[0]; m2 = r2[0]/r0[0]; m3 = r3[0]/r0[0];
    s = r0[1]; r1[1] -= m1 * s; r2[1] -= m2 * s; r3[1] -= m3 * s;
    s = r0[2]; r1[2] -= m1 * s; r2[2] -= m2 * s; r3[2] -= m3 * s;
    s = r0[3]; r1[3] -= m1 * s; r2[3] -= m2 * s; r3[3] -= m3 * s;
    s = r0[4];
    if (s != 0.0F) { r1[4] -= m1 * s; r2[4] -= m2 * s; r3[4] -= m3 * s; }
    s = r0[5];
    if (s != 0.0F) { r1[5] -= m1 * s; r2[5] -= m2 * s; r3[5] -= m3 * s; }
    s = r0[6];
    if (s != 0.0F) { r1[6] -= m1 * s; r2[6] -= m2 * s; r3[6] -= m3 * s; }
    s = r0[7];
    if (s != 0.0F) { r1[7] -= m1 * s; r2[7] -= m2 * s; r3[7] -= m3 * s; }

    /* choose pivot - or die */
    if (fabsf(r3[1])>fabsf(r2[1])) SWAP_ROWS(r3, r2);
    if (fabsf(r2[1])>fabsf(r1[1])) SWAP_ROWS(r2, r1);
    if (0.0F == r1[1])
        return false;

    /* eliminate second variable */
    m2 = r2[1]/r1[1]; m3 = r3[1]/r1[1];
    r2[2] -= m2 * r1[2]; r3[2] -= m3 * r1[2];
    r2[3] -= m2 * r1[3]; r3[3] -= m3 * r1[3];
    s = r1[4]; if (0.0F != s) { r2[4] -= m2 * s; r3[4] -= m3 * s; }
    s = r1[5]; if (0.0F != s) { r2[5] -= m2 * s; r3[5] -= m3 * s; }
    s = r1[6]; if (0.0F != s) { r2[6] -= m2 * s; r3[6] -= m3 * s; }
    s = r1[7]; if (0.0F != s) { r2[7] -= m2 * s; r3[7] -= m3 * s; }

    /* choose pivot - or die */
    if (fabsf(r3[2])>fabsf(r2[2])) SWAP_ROWS(r3, r2);
    if (0.0F == r2[2])
        return false;

    /* eliminate third variable */
    m3 = r3[2]/r2[2];
    r3[3] -= m3 * r2[3], r3[4] -= m3 * r2[4],
    r3[5] -= m3 * r2[5], r3[6] -= m3 * r2[6],
    r3[7] -= m3 * r2[7];

    /* last check */
    if (0.0F == r3[3])
        return false;

    s = 1.0F/r3[3];             /* now back substitute row 3 */
    r3[4] *= s; r3[5] *= s; r3[6] *= s; r3[7] *= s;

    m2 = r2[3];                 /* now back substitute row 2 */
    s  = 1.0F/r2[2];
    r2[4] = s * (r2[4] - r3[4] * m2), r2[5] = s * (r2[5] - r3[5] * m2),
    r2[6] = s * (r2[6] - r3[6] * m2), r2[7] = s * (r2[7] - r3[7] * m2);
    m1 = r1[3];
    r1[4] -= r3[4] * m1, r1[5] -= r3[5] * m1,
    r1[6] -= r3[6] * m1, r1[7] -= r3[7] * m1;
    m0 = r0[3];
    r0[4] -= r3[4] * m0, r0[5] -= r3[5] * m0,
    r0[6] -= r3[6] * m0, r0[7] -= r3[7] * m0;

    m1 = r1[2];                 /* now back substitute row 1 */
    s  = 1.0F/r1[1];
    r1[4] = s * (r1[4] - r2[4] * m1), r1[5] = s * (r1[5] - r2[5] * m1),
    r1[6] = s * (r1[6] - r2[6] * m1), r1[7] = s * (r1[7] - r2[7] * m1);
    m0 = r0[2];
    r0[4] -= r2[4] * m0, r0[5] -= r2[5] * m0,
    r0[6] -= r2[6] * m0, r0[7] -= r2[7] * m0;

    m0 = r0[1];                 /* now back substitute row 0 */
    s  = 1.0F/r0[0];
    r0[4] = s * (r0[4] - r1[4] * m0), r0[5] = s * (r0[5] - r1[5] * m0),
    r0[6] = s * (r0[6] - r1[6] * m0), r0[7] = s * (r0[7] - r1[7] * m0);

    MAT(out,0,0) = r0[4]; MAT(out,0,1) = r0[5],
    MAT(out,0,2) = r0[6]; MAT(out,0,3) = r0[7],
    MAT(out,1,0) = r1[4]; MAT(out,1,1) = r1[5],
    MAT(out,1,2) = r1[6]; MAT(out,1,3) = r1[7],
    MAT(out,2,0) = r2[4]; MAT(out,2,1) = r2[5],
    MAT(out,2,2) = r2[6]; MAT(out,2,3) = r2[7],
    MAT(out,3,0) = r3[4]; MAT(out,3,1) = r3[5],
    MAT(out,3,2) = r3[6]; MAT(out,3,3) = r3[7];

    #undef SWAP_ROWS
    #undef MAT

    return true;
}

Note: you can find this piece of code in the mesa code base: mesa-17.3.9/src/mesa/math/m_matrix.c.

1

Here is a small (just one header) C++ vector math library (geared towards 3D programming). If you use it, keep in mind that layout of its matrices in memory is inverted comparing to what OpenGL expects, I had fun time figuring it out...

1

You can make it faster according to this blog.

#define SUBP(i,j) input[i][j]
#define SUBQ(i,j) input[i][2+j]
#define SUBR(i,j) input[2+i][j]
#define SUBS(i,j) input[2+i][2+j]

#define OUTP(i,j) output[i][j]
#define OUTQ(i,j) output[i][2+j]
#define OUTR(i,j) output[2+i][j]
#define OUTS(i,j) output[2+i][2+j]

#define INVP(i,j) invP[i][j]
#define INVPQ(i,j) invPQ[i][j]
#define RINVP(i,j) RinvP[i][j]
#define INVPQ(i,j) invPQ[i][j]
#define RINVPQ(i,j) RinvPQ[i][j]
#define INVPQR(i,j) invPQR[i][j]
#define INVS(i,j) invS[i][j]

#define MULTI(MAT1, MAT2, MAT3) \
    MAT3(0,0)=MAT1(0,0)*MAT2(0,0) + MAT1(0,1)*MAT2(1,0); \
MAT3(0,1)=MAT1(0,0)*MAT2(0,1) + MAT1(0,1)*MAT2(1,1); \
MAT3(1,0)=MAT1(1,0)*MAT2(0,0) + MAT1(1,1)*MAT2(1,0); \
MAT3(1,1)=MAT1(1,0)*MAT2(0,1) + MAT1(1,1)*MAT2(1,1);

#define INV(MAT1, MAT2) \
    _det = 1.0 / (MAT1(0,0) * MAT1(1,1) - MAT1(0,1) * MAT1(1,0)); \
MAT2(0,0) = MAT1(1,1) * _det; \
MAT2(1,1) = MAT1(0,0) * _det; \
MAT2(0,1) = -MAT1(0,1) * _det; \
MAT2(1,0) = -MAT1(1,0) * _det; \

#define SUBTRACT(MAT1, MAT2, MAT3) \
    MAT3(0,0)=MAT1(0,0) - MAT2(0,0); \
MAT3(0,1)=MAT1(0,1) - MAT2(0,1); \
MAT3(1,0)=MAT1(1,0) - MAT2(1,0); \
MAT3(1,1)=MAT1(1,1) - MAT2(1,1);

#define NEGATIVE(MAT) \
    MAT(0,0)=-MAT(0,0); \
MAT(0,1)=-MAT(0,1); \
MAT(1,0)=-MAT(1,0); \
MAT(1,1)=-MAT(1,1);


void getInvertMatrix(complex<double> input[4][4], complex<double> output[4][4]) {
    complex<double> _det;
    complex<double> invP[2][2];
    complex<double> invPQ[2][2];
    complex<double> RinvP[2][2];
    complex<double> RinvPQ[2][2];
    complex<double> invPQR[2][2];
    complex<double> invS[2][2];


    INV(SUBP, INVP);
    MULTI(SUBR, INVP, RINVP);
    MULTI(INVP, SUBQ, INVPQ);
    MULTI(RINVP, SUBQ, RINVPQ);
    SUBTRACT(SUBS, RINVPQ, INVS);
    INV(INVS, OUTS);
    NEGATIVE(OUTS);
    MULTI(OUTS, RINVP, OUTR);
    MULTI(INVPQ, OUTS, OUTQ);
    MULTI(INVPQ, OUTR, INVPQR);
    SUBTRACT(INVP, INVPQR, OUTP);
}

This is not a complete implementation because P may not be invertible, but you can combine this code with MESA implementation to get a better performance.

1

Adding a 2d case which might be useful for someone else:

inline bool invert4by4matrix(mat m, mat inv_m)
 {

        inv_m->v[0][0] = m->v[1][1] * m->v[2][2] * m->v[3][3] -
                         m->v[1][1] * m->v[2][3] * m->v[3][2] -
                         m->v[2][1] * m->v[1][2] * m->v[3][3] +
                         m->v[2][1] * m->v[1][3] * m->v[3][2] +
                         m->v[3][1] * m->v[1][2] * m->v[2][3] -
                         m->v[3][1] * m->v[1][3] * m->v[2][2];

        inv_m->v[1][0] = -m->v[1][0] * m->v[2][2] * m->v[3][3] +
                         m->v[1][0] * m->v[2][3] * m->v[3][2] +
                         m->v[2][0] * m->v[1][2] * m->v[3][3] -
                         m->v[2][0] * m->v[1][3] * m->v[3][2] -
                         m->v[3][0] * m->v[1][2] * m->v[2][3] +
                         m->v[3][0] * m->v[1][3] * m->v[2][2];

        inv_m->v[2][0] = m->v[1][0] * m->v[2][1] * m->v[3][3] -
                         m->v[1][0] * m->v[2][3] * m->v[3][1] -
                         m->v[2][0] * m->v[1][1] * m->v[3][3] +
                         m->v[2][0] * m->v[1][3] * m->v[3][1] +
                         m->v[3][0] * m->v[1][1] * m->v[2][3] -
                         m->v[3][0] * m->v[1][3] * m->v[2][1];

        inv_m->v[3][0] = -m->v[1][0] * m->v[2][1] * m->v[3][2] +
                         m->v[1][0] * m->v[2][2] * m->v[3][1] +
                         m->v[2][0] * m->v[1][1] * m->v[3][2] -
                         m->v[2][0] * m->v[1][2] * m->v[3][1] -
                         m->v[3][0] * m->v[1][1] * m->v[2][2] +
                         m->v[3][0] * m->v[1][2] * m->v[2][1];

        inv_m->v[0][1] = -m->v[0][1] * m->v[2][2] * m->v[3][3] +
                         m->v[0][1] * m->v[2][3] * m->v[3][2] +
                         m->v[2][1] * m->v[0][2] * m->v[3][3] -
                         m->v[2][1] * m->v[0][3] * m->v[3][2] -
                         m->v[3][1] * m->v[0][2] * m->v[2][3] +
                         m->v[3][1] * m->v[0][3] * m->v[2][2];

        inv_m->v[1][1] = m->v[0][0] * m->v[2][2] * m->v[3][3] -
                         m->v[0][0] * m->v[2][3] * m->v[3][2] -
                         m->v[2][0] * m->v[0][2] * m->v[3][3] +
                         m->v[2][0] * m->v[0][3] * m->v[3][2] +
                         m->v[3][0] * m->v[0][2] * m->v[2][3] -
                         m->v[3][0] * m->v[0][3] * m->v[2][2];

        inv_m->v[2][1] = -m->v[0][0] * m->v[2][1] * m->v[3][3] +
                         m->v[0][0] * m->v[2][3] * m->v[3][1] +
                         m->v[2][0] * m->v[0][1] * m->v[3][3] -
                         m->v[2][0] * m->v[0][3] * m->v[3][1] -
                         m->v[3][0] * m->v[0][1] * m->v[2][3] +
                         m->v[3][0] * m->v[0][3] * m->v[2][1];

        inv_m->v[3][1] = m->v[0][0] * m->v[2][1] * m->v[3][2] -
                         m->v[0][0] * m->v[2][2] * m->v[3][1] -
                         m->v[2][0] * m->v[0][1] * m->v[3][2] +
                         m->v[2][0] * m->v[0][2] * m->v[3][1] +
                         m->v[3][0] * m->v[0][1] * m->v[2][2] -
                         m->v[3][0] * m->v[0][2] * m->v[2][1];

        inv_m->v[0][2] = m->v[0][1] * m->v[1][2] * m->v[3][3] -
                         m->v[0][1] * m->v[1][3] * m->v[3][2] -
                         m->v[1][1] * m->v[0][2] * m->v[3][3] +
                         m->v[1][1] * m->v[0][3] * m->v[3][2] +
                         m->v[3][1] * m->v[0][2] * m->v[1][3] -
                         m->v[3][1] * m->v[0][3] * m->v[1][2];

        inv_m->v[1][2] = -m->v[0][0] * m->v[1][2] * m->v[3][3] +
                         m->v[0][0] * m->v[1][3] * m->v[3][2] +
                         m->v[1][0] * m->v[0][2] * m->v[3][3] -
                         m->v[1][0] * m->v[0][3] * m->v[3][2] -
                         m->v[3][0] * m->v[0][2] * m->v[1][3] +
                         m->v[3][0] * m->v[0][3] * m->v[1][2];

        inv_m->v[2][2] = m->v[0][0] * m->v[1][1] * m->v[3][3] -
                         m->v[0][0] * m->v[1][3] * m->v[3][1] -
                         m->v[1][0] * m->v[0][1] * m->v[3][3] +
                         m->v[1][0] * m->v[0][3] * m->v[3][1] +
                         m->v[3][0] * m->v[0][1] * m->v[1][3] -
                         m->v[3][0] * m->v[0][3] * m->v[1][1];

        inv_m->v[3][2] = -m->v[0][0] * m->v[1][1] * m->v[3][2] +
                         m->v[0][0] * m->v[1][2] * m->v[3][1] +
                         m->v[1][0] * m->v[0][1] * m->v[3][2] -
                         m->v[1][0] * m->v[0][2] * m->v[3][1] -
                         m->v[3][0] * m->v[0][1] * m->v[1][2] +
                         m->v[3][0] * m->v[0][2] * m->v[1][1];

        inv_m->v[0][3] = -m->v[0][1] * m->v[1][2] * m->v[2][3] +
                         m->v[0][1] * m->v[1][3] * m->v[2][2] +
                         m->v[1][1] * m->v[0][2] * m->v[2][3] -
                         m->v[1][1] * m->v[0][3] * m->v[2][2] -
                         m->v[2][1] * m->v[0][2] * m->v[1][3] +
                         m->v[2][1] * m->v[0][3] * m->v[1][2];

        inv_m->v[1][3] = m->v[0][0] * m->v[1][2] * m->v[2][3] -
                         m->v[0][0] * m->v[1][3] * m->v[2][2] -
                         m->v[1][0] * m->v[0][2] * m->v[2][3] +
                         m->v[1][0] * m->v[0][3] * m->v[2][2] +
                         m->v[2][0] * m->v[0][2] * m->v[1][3] -
                         m->v[2][0] * m->v[0][3] * m->v[1][2];

        inv_m->v[2][3] = -m->v[0][0] * m->v[1][1] * m->v[2][3] +
                         m->v[0][0] * m->v[1][3] * m->v[2][1] +
                         m->v[1][0] * m->v[0][1] * m->v[2][3] -
                         m->v[1][0] * m->v[0][3] * m->v[2][1] -
                         m->v[2][0] * m->v[0][1] * m->v[1][3] +
                         m->v[2][0] * m->v[0][3] * m->v[1][1];

        inv_m->v[3][3] = m->v[0][0] * m->v[1][1] * m->v[2][2] -
                         m->v[0][0] * m->v[1][2] * m->v[2][1] -
                         m->v[1][0] * m->v[0][1] * m->v[2][2] +
                         m->v[1][0] * m->v[0][2] * m->v[2][1] +
                         m->v[2][0] * m->v[0][1] * m->v[1][2] -
                         m->v[2][0] * m->v[0][2] * m->v[1][1];

        double det = m->v[0][0] * inv_m->v[0][0] +
                     m->v[0][1] * inv_m->v[1][0] +
                     m->v[0][2] * inv_m->v[2][0] +
                     m->v[0][3] * inv_m->v[3][0];

        if (det == 0)
            return false;

        det = 1.0 / det;

        for (int i = 0; i < 4; i++)
        {
            for (int j = 0; j < 4; j++)
            {
                inv_m->v[i][j] = inv_m->v[i][j] * det;
            }
        }

        return true;
    }
0

If you want to compute the inverse matrix of 4x4 matrix, then I recommend to use a library like OpenGL Mathematics (GLM) :

Anyway, you can do it from scratch. The following implementation is similar to the implementation of glm::inverse, but it is not as highly optimized:

bool InverseMat44( const GLfloat m[16], GLfloat invOut[16] )
{
    float inv[16], det;
    int i;

    inv[0]  =  m[5] * m[10] * m[15] - m[5] * m[11] * m[14] - m[9] * m[6] * m[15] + m[9] * m[7] * m[14] + m[13] * m[6] * m[11] - m[13] * m[7] * m[10];
    inv[4]  = -m[4] * m[10] * m[15] + m[4] * m[11] * m[14] + m[8] * m[6] * m[15] - m[8] * m[7] * m[14] - m[12] * m[6] * m[11] + m[12] * m[7] * m[10];
    inv[8]  =  m[4] * m[9]  * m[15] - m[4] * m[11] * m[13] - m[8] * m[5] * m[15] + m[8] * m[7] * m[13] + m[12] * m[5] * m[11] - m[12] * m[7] * m[9];
    inv[12] = -m[4] * m[9]  * m[14] + m[4] * m[10] * m[13] + m[8] * m[5] * m[14] - m[8] * m[6] * m[13] - m[12] * m[5] * m[10] + m[12] * m[6] * m[9];
    inv[1]  = -m[1] * m[10] * m[15] + m[1] * m[11] * m[14] + m[9] * m[2] * m[15] - m[9] * m[3] * m[14] - m[13] * m[2] * m[11] + m[13] * m[3] * m[10];
    inv[5]  =  m[0] * m[10] * m[15] - m[0] * m[11] * m[14] - m[8] * m[2] * m[15] + m[8] * m[3] * m[14] + m[12] * m[2] * m[11] - m[12] * m[3] * m[10];
    inv[9]  = -m[0] * m[9]  * m[15] + m[0] * m[11] * m[13] + m[8] * m[1] * m[15] - m[8] * m[3] * m[13] - m[12] * m[1] * m[11] + m[12] * m[3] * m[9];
    inv[13] =  m[0] * m[9]  * m[14] - m[0] * m[10] * m[13] - m[8] * m[1] * m[14] + m[8] * m[2] * m[13] + m[12] * m[1] * m[10] - m[12] * m[2] * m[9];
    inv[2]  =  m[1] * m[6]  * m[15] - m[1] * m[7]  * m[14] - m[5] * m[2] * m[15] + m[5] * m[3] * m[14] + m[13] * m[2] * m[7]  - m[13] * m[3] * m[6];
    inv[6]  = -m[0] * m[6]  * m[15] + m[0] * m[7]  * m[14] + m[4] * m[2] * m[15] - m[4] * m[3] * m[14] - m[12] * m[2] * m[7]  + m[12] * m[3] * m[6];
    inv[10] =  m[0] * m[5]  * m[15] - m[0] * m[7]  * m[13] - m[4] * m[1] * m[15] + m[4] * m[3] * m[13] + m[12] * m[1] * m[7]  - m[12] * m[3] * m[5];
    inv[14] = -m[0] * m[5]  * m[14] + m[0] * m[6]  * m[13] + m[4] * m[1] * m[14] - m[4] * m[2] * m[13] - m[12] * m[1] * m[6]  + m[12] * m[2] * m[5];
    inv[3]  = -m[1] * m[6]  * m[11] + m[1] * m[7]  * m[10] + m[5] * m[2] * m[11] - m[5] * m[3] * m[10] - m[9]  * m[2] * m[7]  + m[9]  * m[3] * m[6];
    inv[7]  =  m[0] * m[6]  * m[11] - m[0] * m[7]  * m[10] - m[4] * m[2] * m[11] + m[4] * m[3] * m[10] + m[8]  * m[2] * m[7]  - m[8]  * m[3] * m[6];
    inv[11] = -m[0] * m[5]  * m[11] + m[0] * m[7]  * m[9]  + m[4] * m[1] * m[11] - m[4] * m[3] * m[9]  - m[8]  * m[1] * m[7]  + m[8]  * m[3] * m[5];
    inv[15] =  m[0] * m[5]  * m[10] - m[0] * m[6]  * m[9]  - m[4] * m[1] * m[10] + m[4] * m[2] * m[9]  + m[8]  * m[1] * m[6]  - m[8]  * m[2] * m[5];

    det = m[0] * inv[0] + m[1] * inv[4] + m[2] * inv[8] + m[3] * inv[12];
    if (det == 0) return false;
    det = 1.0 / det;

    for (i = 0; i < 16; i++)
        invOut[i] = inv[i] * det;

    return true;
}

Not the answer you're looking for? Browse other questions tagged or ask your own question.