1.1.0/helper__funcs_8hpp_source.html

 #ifndef ICP_HELPERFUNCS_HPP

 #define ICP_HELPERFUNCS_HPP


 #include <cassert>

 #include <algorithm>

 #include <functional>

 #include <RBC/data_types.hpp>


 #if defined(__APPLE__) || defined(__MACOSX)

 #include <OpenCL/cl.hpp>

 #else

 #include <CL/cl.hpp>

 #endif


 namespace ICP

 {


     bool setProfilingFlag (int argc, char **argv);


     template <typename T>

     uint64_t nextPow2 (T num)

     {

         assert (num >= 0);


         uint64_t pow;

         for (pow = 1; pow < (uint64_t) num; pow <<= 1) ;


         return pow;

     }


     template <typename T>

     void printBuffer (const char *title, T *ptr, uint32_t width, uint32_t height)

     {

         std::cout << title << std::endl;


         for (int row = 0; row < height; ++row)

         {

             for (int col = 0; col < width; ++col)

             {

                 std::cout << std::setw (3 * sizeof (T)) << +ptr[row * width + col] << " ";

             }

             std::cout << std::endl;

         }


         std::cout << std::endl;

     }


     template <typename T>

     void printBufferF (const char *title, T *ptr, uint32_t width, uint32_t height, uint32_t prec)

     {

         std::ios::fmtflags f (std::cout.flags ());

         std::cout << title << std::endl;

         std::cout << std::fixed << std::setprecision (prec);


         for (int row = 0; row < height; ++row)

         {

             for (int col = 0; col < width; ++col)

             {

                 std::cout << std::setw (5 + prec) << ptr[row * width + col] << " ";

             }

             std::cout << std::endl;

         }


         std::cout << std::endl;

         std::cout.flags (f);

     }


     template <typename T>

     void cpuReduce (T *in, T *out, uint32_t cols, uint32_t rows, std::function<bool (T, T)> func)

     {

         for (uint r = 0; r < rows; ++r)

         {

             T rec = in[r * cols];

             for (uint c = 1; c < cols; ++c)

             {

                 T tmp = in[r * cols + c];

                 if (func (tmp, rec)) rec = tmp;

             }

             out[r] = rec;

         }

     }


     template <typename T>

     void cpuReduceSum (T *in, T *out, uint32_t cols, uint32_t rows)

     {

         for (uint r = 0; r < rows; ++r)

             out[r] = std::accumulate (&in[r * cols], &in[r * cols] + cols, 0.f);

     }


     template <typename T>

     void cpuInScan (T *in, T *out, uint32_t width, uint32_t height)

     {

         // Initialize the first element of each row

         for (uint32_t row = 0; row < height; ++row)

             out[row * width] = in[row * width];

         // Perform the scan

         for (uint32_t row = 0; row < height; ++row)

             for (uint32_t col = 1; col < width; ++col)

                 out[row * width + col] = out[row * width + col - 1] + in[row * width + col];

     }


     template <typename T>

     void cpuExScan (T *in, T *out, uint32_t width, uint32_t height)

     {

         // Initialize the first element of each row

         for (uint32_t row = 0; row < height; ++row)

             out[row * width] = 0;

         // Perform the scan

         for (uint32_t row = 0; row < height; ++row)

             for (uint32_t col = 1; col < width; ++col)

                 out[row * width + col] = out[row * width + col - 1] + in[row * width + col - 1];

     }


     template <typename T>

     void cpuICPLMs (T *in, T *out)

     {

         for (uint32_t gY = 0; gY < 128; ++gY)

         {

             uint32_t yi = gY * 3 + 1;


             for (uint32_t gX = 0; gX < 128 * 8; gX += 8)

             {

                 uint32_t xi = gX * 4 + 1 * 8;


                 for (uint32_t k = 0; k < 8; ++k)

                     out[gY * (128 * 8) + gX + k] = in[(48 + yi) * (640 * 8) + ((64 * 8) + xi) + k];

             }

         }

     }


     template <typename T>

     void cpuICPReps (T *in, T *out, uint32_t nr)

     {

         int p = std::log2 (nr);

         uint32_t nrx = std::pow (2, p - p / 2);

         uint32_t nry = std::pow (2, p / 2);


         uint stepX = 128 / nrx;

         uint stepY = 128 / nry;


         for (uint32_t gY = 0; gY < nry; ++gY)

         {

             uint32_t yi = gY * stepY + (stepY >> 1) - 1;


             for (uint32_t gX = 0; gX < nrx * 8; gX += 8)

             {

                 uint32_t xi = gX * stepX + ((stepX >> 1) - 1) * 8;


                 for (uint32_t k = 0; k < 8; ++k)

                     out[gY * (nrx * 8) + gX + k] = in[yi * (128 * 8) + xi + k];

             }

         }

     }


     template <typename T>

     void cpuICPWeights (rbc_dist_id *D, T *W, cl_double *SW, uint32_t n)

     {

         for (uint32_t j = 0; j < n; ++j)

             W[j] = 100.f / (100.f + D[j].dist);


         *SW = std::accumulate (W, W + n, 0.0);

     }


     template <typename T>

     void cpuICPMean (T *F, T *M, T *mean, uint32_t n)

     {

         mean[0] = mean[1] = mean[2] = mean[3] = 0.0;

         mean[4] = mean[5] = mean[6] = mean[7] = 0.0;


         for (uint32_t j = 0; j < n; ++j)

         {

             mean[0] += F[j * 8] / (T) n;

             mean[1] += F[j * 8 + 1] / (T) n;

             mean[2] += F[j * 8 + 2] / (T) n;


             mean[4] += M[j * 8] / (T) n;

             mean[5] += M[j * 8 + 1] / (T) n;

             mean[6] += M[j * 8 + 2] / (T) n;

         }

     }


     template <typename T>

     void cpuICPMeanWeighted (T *F, T *M, T *MEAN, T *W, uint32_t n)

     {

         MEAN[0] = MEAN[1] = MEAN[2] = MEAN[3] = 0.0;

         MEAN[4] = MEAN[5] = MEAN[6] = MEAN[7] = 0.0;


         cl_double sum_w = std::accumulate (W, W + n, 0.0);


         for (uint32_t j = 0; j < n; ++j)

         {

             T w = W[j] / sum_w;


             MEAN[0] += w * F[j * 8];

             MEAN[1] += w * F[j * 8 + 1];

             MEAN[2] += w * F[j * 8 + 2];


             MEAN[4] += w * M[j * 8];

             MEAN[5] += w * M[j * 8 + 1];

             MEAN[6] += w * M[j * 8 + 2];

         }

     }


     template <typename T>

     void cpuICPDevs (T *F, T *M, T *DF, T *DM, T *mean, uint32_t n)

     {

         for (uint32_t j = 0; j < n; ++j)

         {

             for (uint32_t k = 0; k < 4; ++k)

             {

                 DF[j * 4 + k] = F[j * 8 + k] - mean[k];

                 DM[j * 4 + k] = M[j * 8 + k] - mean[4 + k];

             }

         }

     }


     template <typename T>

     void cpuICPS (T *DM, T *DF, T *S, uint32_t m, float c)

     {

         for (uint32_t j = 0; j < 11; ++j)

             S[j] = 0.f;


         for (uint32_t i = 0; i < m; ++i)

         {

             T mp[3] = { c * DM[i * 4], c * DM[i * 4 + 1], c * DM[i * 4 + 2] };

             T fp[3] = { c * DF[i * 4], c * DF[i * 4 + 1], c * DF[i * 4 + 2] };


             S[0] += mp[0] * fp[0];

             S[1] += mp[0] * fp[1];

             S[2] += mp[0] * fp[2];

             S[3] += mp[1] * fp[0];

             S[4] += mp[1] * fp[1];

             S[5] += mp[1] * fp[2];

             S[6] += mp[2] * fp[0];

             S[7] += mp[2] * fp[1];

             S[8] += mp[2] * fp[2];

             S[9]  += mp[0] * mp[0] + mp[1] * mp[1] + mp[2] * mp[2];

             S[10] += fp[0] * fp[0] + fp[1] * fp[1] + fp[2] * fp[2];

         }

     }


     template <typename T>

     void cpuICPSw (T *M, T *F, T *W, T *S, uint32_t m, float c)

     {

         for (uint32_t j = 0; j < 11; ++j)

             S[j] = 0.f;


         for (uint32_t i = 0; i < m; ++i)

         {

             T mp[3] = { c * M[i * 4], c * M[i * 4 + 1], c * M[i * 4 + 2] };

             T fp[3] = { c * F[i * 4], c * F[i * 4 + 1], c * F[i * 4 + 2] };

             T w = W[i];


             S[0] += w * mp[0] * fp[0];

             S[1] += w * mp[0] * fp[1];

             S[2] += w * mp[0] * fp[2];

             S[3] += w * mp[1] * fp[0];

             S[4] += w * mp[1] * fp[1];

             S[5] += w * mp[1] * fp[2];

             S[6] += w * mp[2] * fp[0];

             S[7] += w * mp[2] * fp[1];

             S[8] += w * mp[2] * fp[2];

             S[9]  += w * (mp[0] * mp[0] + mp[1] * mp[1] + mp[2] * mp[2]);

             S[10] += w * (fp[0] * fp[0] + fp[1] * fp[1] + fp[2] * fp[2]);

         }

     }


     template <typename T>

     void cross_product (T *a, T *b, T *c)

     {

         c[0] = (a[1] * b[2]) - (a[2] * b[1]);

         c[1] = (a[2] * b[0]) - (a[0] * b[2]);

         c[2] = (a[0] * b[1]) - (a[1] * b[0]);

     }


     template <typename T>

     void cpuICPTransformQ (T *M, T *tM, T *D, uint32_t m)

     {

         T q[4] = { D[0], D[1], D[2], D[3] };

         T t[3] = { D[4], D[5], D[6] };

         T s = D[7];


         for (uint32_t i = 0; i < m; ++i)

         {

             T p[3] = { M[i * 8], M[i * 8 + 1], M[i * 8 + 2] };


             T q2[3] = { 2 * q[0], 2 * q[1], 2 * q[2] };


             T qcp[3]; cross_product (q, p, qcp);

             qcp[0] = qcp[0] + q[3] * p[0];

             qcp[1] = qcp[1] + q[3] * p[1];

             qcp[2] = qcp[2] + q[3] * p[2];


             T tp[3], q2cqcp[3]; cross_product (q2, qcp, q2cqcp);

             tp[0] = s * (p[0] + q2cqcp[0]) + t[0];

             tp[1] = s * (p[1] + q2cqcp[1]) + t[1];

             tp[2] = s * (p[2] + q2cqcp[2]) + t[2];


             tM[i * 8]     = tp[0];

             tM[i * 8 + 1] = tp[1];

             tM[i * 8 + 2] = tp[2];

             tM[i * 8 + 3] = M[i * 8 + 3];

             tM[i * 8 + 4] = M[i * 8 + 4];

             tM[i * 8 + 5] = M[i * 8 + 5];

             tM[i * 8 + 6] = M[i * 8 + 6];

             tM[i * 8 + 7] = M[i * 8 + 7];

         }

     }


     template <typename T>

     void cpuICPTransformQ2 (T *M, T *tM, T *D, uint32_t m)

     {

         T q[4] = { D[0], D[1], D[2], D[3] };

         T t[3] = { D[4], D[5], D[6] };

         T s = D[7];


         T Q[4][4] = { {  q[3], -q[2],  q[1], q[0] },

                       {  q[2],  q[3], -q[0], q[1] },

                       { -q[1],  q[0],  q[3], q[2] },

                       { -q[0], -q[1], -q[2], q[3] } };


         T Q_[3][4] = { {  q[3], -q[2],  q[1], -q[0] },

                        {  q[2],  q[3], -q[0], -q[1] },

                        { -q[1],  q[0],  q[3], -q[2] } };


         for (uint32_t i = 0; i < m; ++i)

         {

             T p[4] = { M[i * 8], M[i * 8 + 1], M[i * 8 + 2], 0.f };


             T p_[4] = { std::inner_product (Q[0], Q[0] + 4, p, 0.f),

                         std::inner_product (Q[1], Q[1] + 4, p, 0.f),

                         std::inner_product (Q[2], Q[2] + 4, p, 0.f),

                         std::inner_product (Q[3], Q[3] + 4, p, 0.f) };


             T tp[4];

             tp[0] = s * std::inner_product (Q_[0], Q_[0] + 4, p_, 0.f) + t[0];

             tp[1] = s * std::inner_product (Q_[1], Q_[1] + 4, p_, 0.f) + t[1];

             tp[2] = s * std::inner_product (Q_[2], Q_[2] + 4, p_, 0.f) + t[2];


             tM[i * 8]     = tp[0];

             tM[i * 8 + 1] = tp[1];

             tM[i * 8 + 2] = tp[2];

             tM[i * 8 + 3] = M[i * 8 + 3];

             tM[i * 8 + 4] = M[i * 8 + 4];

             tM[i * 8 + 5] = M[i * 8 + 5];

             tM[i * 8 + 6] = M[i * 8 + 6];

             tM[i * 8 + 7] = M[i * 8 + 7];

         }

     }


     template <typename T>

     void cpuICPTransformM (T *M, T *tM, T *D, uint32_t m)

     {

         for (uint32_t i = 0; i < m; ++i)

         {

             tM[i * 8]     = std::inner_product (&D[0], &D[4], &M[i * 8], 0.f);

             tM[i * 8 + 1] = std::inner_product (&D[4], &D[8], &M[i * 8], 0.f);

             tM[i * 8 + 2] = std::inner_product (&D[8], &D[12], &M[i * 8], 0.f);

             tM[i * 8 + 3] = M[i * 8 + 3];

             tM[i * 8 + 4] = M[i * 8 + 4];

             tM[i * 8 + 5] = M[i * 8 + 5];

             tM[i * 8 + 6] = M[i * 8 + 6];

             tM[i * 8 + 7] = M[i * 8 + 7];

         }

     }


     template <typename T>

     T cpuLength (T *x)

     {

         T sum = 0.f;


         sum += x[0] * x[0];

         sum += x[1] * x[1];

         sum += x[2] * x[2];

         sum += x[3] * x[3];


         return std::sqrt (sum);

     }


     template <typename T>

     T cpuDistance (T *x1, T *x2)

     {

         T sum = 0.f;


         sum += std::pow (x1[0] - x2[0], 2);

         sum += std::pow (x1[1] - x2[1], 2);

         sum += std::pow (x1[2] - x2[2], 2);

         sum += std::pow (x1[3] - x2[3], 2);


         return std::sqrt (sum);

     }


     template <typename T>

     void cpuNormalize (T *x)

     {

         T norm = cpuLength (x);


         x[0] /= norm;

         x[1] /= norm;

         x[2] /= norm;

         x[3] /= norm;

     }


     template <typename T>

     void cpuProd (T *N, T *x, T *x_new)

     {


         x_new[0] = std::inner_product (N     , N +  4, x, 0.f);

         x_new[1] = std::inner_product (N +  4, N +  8, x, 0.f);

         x_new[2] = std::inner_product (N +  8, N + 12, x, 0.f);

         x_new[3] = std::inner_product (N + 12, N + 16, x, 0.f);

     }


     template <typename T>

     T cpuICPPowerMethod (T *Sij, T *means, T *Tk)

     {

         T Sxx = Sij[0];

         T Sxy = Sij[1];

         T Sxz = Sij[2];

         T Syx = Sij[3];

         T Syy = Sij[4];

         T Syz = Sij[5];

         T Szx = Sij[6];

         T Szy = Sij[7];

         T Szz = Sij[8];


         T sk = sqrt (Sij[9] / Sij[10]);


         T N[16] =

         {

             Sxx - Syy - Szz,         Sxy + Syx,         Szx + Sxz,       Syz - Szy,

                   Sxy + Syx, - Sxx + Syy - Szz,         Syz + Szy,       Szx - Sxz,

                   Szx + Sxz,         Syz + Szy, - Sxx - Syy + Szz,       Sxy - Syx,

                   Syz - Szy,         Szx - Sxz,         Sxy - Syx, Sxx + Syy + Szz

         };


         // Power Method ============================================================


         T x[4] = { 1.f, 1.f, 1.f, 1.f };

         T x_new[4];


         // Parameters

         uint maxIter = 1000;

         T error, error_new;


         while (true)

         {

             for (uint iter = 0; iter < maxIter; ++iter)

             {

                 cpuProd (N, x, x_new);


                 cpuNormalize (x_new);


                 error = error_new;

                 if ((error_new = cpuDistance (x, x_new)) == error) break;


                 std::copy (x_new, x_new + 4, x);

             }


             T lambda = std::inner_product (N, N + 4, x_new, 0.f) / x_new[0];


             if (lambda < 0)

             {

                 N[0] -= lambda;

                 N[5] -= lambda;

                 N[10] -= lambda;

                 N[15] -= lambda;


                 x[0] = x[1] = x[2] = x[3] = 1.f;

             }

             else

                 break;

         }


         std::copy (x_new, x_new + 4, x);

         cpuProd (N, x, x_new);

         cpuNormalize (x_new);


         // =========================================================================


         T qk[4] = { x_new[0], x_new[1], x_new[2], x_new[3] };


         T mf[3] = { means[0], means[1], means[2] };

         T mm[3] = { means[4], means[5], means[6] };


         // tk = mf - sk * (mm + cross (2 * qk.xyz, cross (qk.xyz, mm) + qk.w * mm))

         T qk_2[3] = { 2 * qk[0], 2 * qk[1], 2 * qk[2] };

         T cp1[3]; cross_product (qk, mm, cp1);

         T mmw[3] = { qk[3] * mm[0], qk[3] * mm[1], qk[3] * mm[2] };

         T tmp1[3] = { cp1[0] + mmw[0], cp1[1] + mmw[1], cp1[2] + mmw[2] };

         T cp2[3]; cross_product (qk_2, tmp1, cp2);

         T tmp2[3] = { sk * (mm[0] + cp2[0]), sk * (mm[1] + cp2[1]), sk * (mm[2] + cp2[2]) };

         T tk[4] = { mf[0] - tmp2[0], mf[1] - tmp2[1], mf[2] - tmp2[2], sk };


         std::copy (qk, qk + 4, Tk);

         std::copy (tk, tk + 4, Tk + 4);

     }


 }


 #endif  // ICP_HELPERFUNCS_HPP

ICP::cpuICPTransformM
void cpuICPTransformM(T *M, T *tM, T *D, uint32_t m)
Performs a homogeneous transformation on a set of points using a transformation matrix.
Definition: helper_funcs.hpp:575

ICP
Offers functions that are serial CPU implementations of the relevant algorithms in the ICP pipeline...
Definition: helper_funcs.hpp:47

ICP::cpuICPSw
void cpuICPSw(T *M, T *F, T *W, T *S, uint32_t m, float c)
Calculates the S matrix and the constituents of the scale factor s.
Definition: helper_funcs.hpp:424

ICP::cpuICPLMs
void cpuICPLMs(T *in, T *out)
Samples a point cloud for 16384 (128x128) landmarks.
Definition: helper_funcs.hpp:220

ICP::cpuExScan
void cpuExScan(T *in, T *out, uint32_t width, uint32_t height)
Performs an exclusive scan operation on the columns of an array.
Definition: helper_funcs.hpp:200

ICP::cpuICPMean
void cpuICPMean(T *F, T *M, T *mean, uint32_t n)
Computes the mean on the xyz dimensions of the set of 8-D points.
Definition: helper_funcs.hpp:300

ICP::printBufferF
void printBufferF(const char *title, T *ptr, uint32_t width, uint32_t height, uint32_t prec)
Prints an array of floating-point type to standard output.
Definition: helper_funcs.hpp:107

ICP::cpuICPWeights
void cpuICPWeights(rbc_dist_id *D, T *W, cl_double *SW, uint32_t n)
Computes weights for pairs of points in the fixed and moving sets, and also reduces them to get their...
Definition: helper_funcs.hpp:281

ICP::cpuReduce
void cpuReduce(T *in, T *out, uint32_t cols, uint32_t rows, std::function< bool(T, T)> func)
Reduces each row of an array to a single element.
Definition: helper_funcs.hpp:137

ICP::cpuProd
void cpuProd(T *N, T *x, T *x_new)
Computes a matrix-vector product, .
Definition: helper_funcs.hpp:657

ICP::cpuICPPowerMethod
T cpuICPPowerMethod(T *Sij, T *means, T *Tk)
Computes the quantities that represent the incremental development in the transformation estimation i...
Definition: helper_funcs.hpp:682

ICP::cpuICPTransformQ2
void cpuICPTransformQ2(T *M, T *tM, T *D, uint32_t m)
Performs a homogeneous transformation on a set of points using a quaternion and a translation vector...
Definition: helper_funcs.hpp:523

ICP::cpuInScan
void cpuInScan(T *in, T *out, uint32_t width, uint32_t height)
Performs an inclusive scan operation on the columns of an array.
Definition: helper_funcs.hpp:178

ICP::setProfilingFlag
bool setProfilingFlag(int argc, char **argv)
Checks the command line arguments for the profiling flag, --profiling.
Definition: helper_funcs.cpp:66

ICP::cpuDistance
T cpuDistance(T *x1, T *x2)
Computes the vector distance (  norm).
Definition: helper_funcs.hpp:619

ICP::cpuICPMeanWeighted
void cpuICPMeanWeighted(T *F, T *M, T *MEAN, T *W, uint32_t n)
Computes the weighted mean on the xyz dimensions of the set of 8-D points.
Definition: helper_funcs.hpp:329

ICP::cpuReduceSum
void cpuReduceSum(T *in, T *out, uint32_t cols, uint32_t rows)
Reduces each row of an array to a single element (sum).
Definition: helper_funcs.hpp:161

ICP::cross_product
void cross_product(T *a, T *b, T *c)
Performs a cross product.
Definition: helper_funcs.hpp:459

ICP::cpuICPS
void cpuICPS(T *DM, T *DF, T *S, uint32_t m, float c)
Calculates the S matrix and the constituents of the scale factor s.
Definition: helper_funcs.hpp:387

ICP::cpuICPDevs
void cpuICPDevs(T *F, T *M, T *DF, T *DM, T *mean, uint32_t n)
Computes the deviations of a set of points from their mean.
Definition: helper_funcs.hpp:363

ICP::printBuffer
void printBuffer(const char *title, T *ptr, uint32_t width, uint32_t height)
Prints an array of an integer type to standard output.
Definition: helper_funcs.hpp:80

ICP::cpuLength
T cpuLength(T *x)
Computes the vector length (  norm).
Definition: helper_funcs.hpp:598

ICP::cpuICPTransformQ
void cpuICPTransformQ(T *M, T *tM, T *D, uint32_t m)
Performs a homogeneous transformation on a set of points using a quaternion and a translation vector...
Definition: helper_funcs.hpp:478

ICP::cpuNormalize
void cpuNormalize(T *x)
Normalizes a vector.
Definition: helper_funcs.hpp:638

ICP::nextPow2
uint64_t nextPow2(T num)
Returns the first power of 2 greater than or equal to the input.
Definition: helper_funcs.hpp:60

ICP::cpuICPReps
void cpuICPReps(T *in, T *out, uint32_t nr)
Samples a set of 16384 (128x128) landmarks for representatives.
Definition: helper_funcs.hpp:246