Beddell, Thomas Edmund
2014-08-07 12:54:52 UTC
Hi,
I made a 3d math library in Lua and am very pleased with the performance in Lua JIT 2.0.3. In my test with LuaJIT 2.0.3 I can do 300,000 4x4 matrix multiply inversions in 0.36 seconds.
The same code took 6 seconds in vanilla Lua 5.0. However according to my co-worker "Equivalent" code takes 0.04 seconds in C++.
I also tried using ffi with float[16] arrays for the values and also a struct with metamethods but they were twice as slow as the lua table version.
Being within an order of magnitude as fast as C is not bad at all for a scripting language after all. Is there room for improvement in my code or should I consider it as good as can be?
I include the test program below.
Best regards,
Thomas Beddell
-- double 4x4, 1-based, column major
matrix = {}
-- Source for own metamethods
matrix.__index = matrix
setmetatable(matrix, matrix)
-- Create a matrix object. Tested OK
matrix.__call = function(self, ...)
-- Can initialize values from argument
local m = {...}
if #m == 0 then m = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} end
-- Look in matrix for metamethods
setmetatable(m, matrix)
return m
end
-- Set matrix to identity matrix. Tested OK
matrix.identity = function(self)
self = matrix()
for i=1, 16, 5 do
self[i] = 1
end
return self
end
-- Inverse of matrix. Tested OK
matrix.inverse = function(self)
local out = {}
out[1] = self[6] * self[11] * self[16] -
self[6] * self[12] * self[15] -
self[10] * self[7] * self[16] +
self[10] * self[8] * self[15] +
self[14] * self[7] * self[12] -
self[14] * self[8] * self[11]
out[5] = -self[5] * self[11] * self[16] +
self[5] * self[12] * self[15] +
self[9] * self[7] * self[16] -
self[9] * self[8] * self[15] -
self[13] * self[7] * self[12] +
self[13] * self[8] * self[11]
out[9] = self[5] * self[10] * self[16] -
self[5] * self[12] * self[14] -
self[9] * self[6] * self[16] +
self[9] * self[8] * self[14] +
self[13] * self[6] * self[12] -
self[13] * self[8] * self[10]
out[13] = -self[5] * self[10] * self[15] +
self[5] * self[11] * self[14] +
self[9] * self[6] * self[15] -
self[9] * self[7] * self[14] -
self[13] * self[6] * self[11] +
self[13] * self[7] * self[10]
out[2] = -self[2] * self[11] * self[16] +
self[2] * self[12] * self[15] +
self[10] * self[3] * self[16] -
self[10] * self[4] * self[15] -
self[14] * self[3] * self[12] +
self[14] * self[4] * self[11]
out[6] = self[1] * self[11] * self[16] -
self[1] * self[12] * self[15] -
self[9] * self[3] * self[16] +
self[9] * self[4] * self[15] +
self[13] * self[3] * self[12] -
self[13] * self[4] * self[11]
out[10] = -self[1] * self[10] * self[16] +
self[1] * self[12] * self[14] +
self[9] * self[2] * self[16] -
self[9] * self[4] * self[14] -
self[13] * self[2] * self[12] +
self[13] * self[4] * self[10]
out[14] = self[1] * self[10] * self[15] -
self[1] * self[11] * self[14] -
self[9] * self[2] * self[15] +
self[9] * self[3] * self[14] +
self[13] * self[2] * self[11] -
self[13] * self[3] * self[10]
out[3] = self[2] * self[7] * self[16] -
self[2] * self[8] * self[15] -
self[6] * self[3] * self[16] +
self[6] * self[4] * self[15] +
self[14] * self[3] * self[8] -
self[14] * self[4] * self[7]
out[7] = -self[1] * self[7] * self[16] +
self[1] * self[8] * self[15] +
self[5] * self[3] * self[16] -
self[5] * self[4] * self[15] -
self[13] * self[3] * self[8] +
self[13] * self[4] * self[7]
out[11] = self[1] * self[6] * self[16] -
self[1] * self[8] * self[14] -
self[5] * self[2] * self[16] +
self[5] * self[4] * self[14] +
self[13] * self[2] * self[8] -
self[13] * self[4] * self[6]
out[15] = -self[1] * self[6] * self[15] +
self[1] * self[7] * self[14] +
self[5] * self[2] * self[15] -
self[5] * self[3] * self[14] -
self[13] * self[2] * self[7] +
self[13] * self[3] * self[6]
out[4] = -self[2] * self[7] * self[12] +
self[2] * self[8] * self[11] +
self[6] * self[3] * self[12] -
self[6] * self[4] * self[11] -
self[10] * self[3] * self[8] +
self[10] * self[4] * self[7]
out[8] = self[1] * self[7] * self[12] -
self[1] * self[8] * self[11] -
self[5] * self[3] * self[12] +
self[5] * self[4] * self[11] +
self[9] * self[3] * self[8] -
self[9] * self[4] * self[7]
out[12] = -self[1] * self[6] * self[12] +
self[1] * self[8] * self[10] +
self[5] * self[2] * self[12] -
self[5] * self[4] * self[10] -
self[9] * self[2] * self[8] +
self[9] * self[4] * self[6]
out[16] = self[1] * self[6] * self[11] -
self[1] * self[7] * self[10] -
self[5] * self[2] * self[11] +
self[5] * self[3] * self[10] +
self[9] * self[2] * self[7] -
self[9] * self[3] * self[6]
local det = self[1] * out[1] + self[2] * out[5] + self[3] * out[9] + self[4] * out[13]
if det == 0 then return self end
det = 1.0 / det
for i = 1, 16 do
out[i] = out[i] * det
end
return matrix(unpack(out))
end
-- Multiply matrix by a matrix. Tested OK
matrix.__mul = function(self, m)
local out = matrix()
for i=0, 12, 4 do
for j=1, 4 do
out[i+j] = m[j] * self[i+1] + m[j+4] * self[i+2] + m[j+8] * self[i+3] + m[j+12] * self[i+4]
end
end
return out
end
-- Test
local t = os.clock()
local mOut
local m = matrix():identity()
for i=1, 300000 do
mOut = m * m:inverse()
end
local time = os.clock() - t
print(time)
I made a 3d math library in Lua and am very pleased with the performance in Lua JIT 2.0.3. In my test with LuaJIT 2.0.3 I can do 300,000 4x4 matrix multiply inversions in 0.36 seconds.
The same code took 6 seconds in vanilla Lua 5.0. However according to my co-worker "Equivalent" code takes 0.04 seconds in C++.
I also tried using ffi with float[16] arrays for the values and also a struct with metamethods but they were twice as slow as the lua table version.
Being within an order of magnitude as fast as C is not bad at all for a scripting language after all. Is there room for improvement in my code or should I consider it as good as can be?
I include the test program below.
Best regards,
Thomas Beddell
-- double 4x4, 1-based, column major
matrix = {}
-- Source for own metamethods
matrix.__index = matrix
setmetatable(matrix, matrix)
-- Create a matrix object. Tested OK
matrix.__call = function(self, ...)
-- Can initialize values from argument
local m = {...}
if #m == 0 then m = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} end
-- Look in matrix for metamethods
setmetatable(m, matrix)
return m
end
-- Set matrix to identity matrix. Tested OK
matrix.identity = function(self)
self = matrix()
for i=1, 16, 5 do
self[i] = 1
end
return self
end
-- Inverse of matrix. Tested OK
matrix.inverse = function(self)
local out = {}
out[1] = self[6] * self[11] * self[16] -
self[6] * self[12] * self[15] -
self[10] * self[7] * self[16] +
self[10] * self[8] * self[15] +
self[14] * self[7] * self[12] -
self[14] * self[8] * self[11]
out[5] = -self[5] * self[11] * self[16] +
self[5] * self[12] * self[15] +
self[9] * self[7] * self[16] -
self[9] * self[8] * self[15] -
self[13] * self[7] * self[12] +
self[13] * self[8] * self[11]
out[9] = self[5] * self[10] * self[16] -
self[5] * self[12] * self[14] -
self[9] * self[6] * self[16] +
self[9] * self[8] * self[14] +
self[13] * self[6] * self[12] -
self[13] * self[8] * self[10]
out[13] = -self[5] * self[10] * self[15] +
self[5] * self[11] * self[14] +
self[9] * self[6] * self[15] -
self[9] * self[7] * self[14] -
self[13] * self[6] * self[11] +
self[13] * self[7] * self[10]
out[2] = -self[2] * self[11] * self[16] +
self[2] * self[12] * self[15] +
self[10] * self[3] * self[16] -
self[10] * self[4] * self[15] -
self[14] * self[3] * self[12] +
self[14] * self[4] * self[11]
out[6] = self[1] * self[11] * self[16] -
self[1] * self[12] * self[15] -
self[9] * self[3] * self[16] +
self[9] * self[4] * self[15] +
self[13] * self[3] * self[12] -
self[13] * self[4] * self[11]
out[10] = -self[1] * self[10] * self[16] +
self[1] * self[12] * self[14] +
self[9] * self[2] * self[16] -
self[9] * self[4] * self[14] -
self[13] * self[2] * self[12] +
self[13] * self[4] * self[10]
out[14] = self[1] * self[10] * self[15] -
self[1] * self[11] * self[14] -
self[9] * self[2] * self[15] +
self[9] * self[3] * self[14] +
self[13] * self[2] * self[11] -
self[13] * self[3] * self[10]
out[3] = self[2] * self[7] * self[16] -
self[2] * self[8] * self[15] -
self[6] * self[3] * self[16] +
self[6] * self[4] * self[15] +
self[14] * self[3] * self[8] -
self[14] * self[4] * self[7]
out[7] = -self[1] * self[7] * self[16] +
self[1] * self[8] * self[15] +
self[5] * self[3] * self[16] -
self[5] * self[4] * self[15] -
self[13] * self[3] * self[8] +
self[13] * self[4] * self[7]
out[11] = self[1] * self[6] * self[16] -
self[1] * self[8] * self[14] -
self[5] * self[2] * self[16] +
self[5] * self[4] * self[14] +
self[13] * self[2] * self[8] -
self[13] * self[4] * self[6]
out[15] = -self[1] * self[6] * self[15] +
self[1] * self[7] * self[14] +
self[5] * self[2] * self[15] -
self[5] * self[3] * self[14] -
self[13] * self[2] * self[7] +
self[13] * self[3] * self[6]
out[4] = -self[2] * self[7] * self[12] +
self[2] * self[8] * self[11] +
self[6] * self[3] * self[12] -
self[6] * self[4] * self[11] -
self[10] * self[3] * self[8] +
self[10] * self[4] * self[7]
out[8] = self[1] * self[7] * self[12] -
self[1] * self[8] * self[11] -
self[5] * self[3] * self[12] +
self[5] * self[4] * self[11] +
self[9] * self[3] * self[8] -
self[9] * self[4] * self[7]
out[12] = -self[1] * self[6] * self[12] +
self[1] * self[8] * self[10] +
self[5] * self[2] * self[12] -
self[5] * self[4] * self[10] -
self[9] * self[2] * self[8] +
self[9] * self[4] * self[6]
out[16] = self[1] * self[6] * self[11] -
self[1] * self[7] * self[10] -
self[5] * self[2] * self[11] +
self[5] * self[3] * self[10] +
self[9] * self[2] * self[7] -
self[9] * self[3] * self[6]
local det = self[1] * out[1] + self[2] * out[5] + self[3] * out[9] + self[4] * out[13]
if det == 0 then return self end
det = 1.0 / det
for i = 1, 16 do
out[i] = out[i] * det
end
return matrix(unpack(out))
end
-- Multiply matrix by a matrix. Tested OK
matrix.__mul = function(self, m)
local out = matrix()
for i=0, 12, 4 do
for j=1, 4 do
out[i+j] = m[j] * self[i+1] + m[j+4] * self[i+2] + m[j+8] * self[i+3] + m[j+12] * self[i+4]
end
end
return out
end
-- Test
local t = os.clock()
local mOut
local m = matrix():identity()
for i=1, 300000 do
mOut = m * m:inverse()
end
local time = os.clock() - t
print(time)