29 #ifndef ICP_HELPERFUNCS_HPP
30 #define ICP_HELPERFUNCS_HPP
35 #include <RBC/data_types.hpp>
37 #if defined(__APPLE__) || defined(__MACOSX)
38 #include <OpenCL/cl.hpp>
65 for (pow = 1; pow < (uint64_t) num; pow <<= 1) ;
80 void printBuffer (
const char *title, T *ptr, uint32_t width, uint32_t height)
82 std::cout << title << std::endl;
84 for (
int row = 0; row < height; ++row)
86 for (
int col = 0; col < width; ++col)
88 std::cout << std::setw (3 *
sizeof (T)) << +ptr[row * width + col] <<
" ";
90 std::cout << std::endl;
93 std::cout << std::endl;
106 template <
typename T>
107 void printBufferF (
const char *title, T *ptr, uint32_t width, uint32_t height, uint32_t prec)
109 std::ios::fmtflags f (std::cout.flags ());
110 std::cout << title << std::endl;
111 std::cout << std::fixed << std::setprecision (prec);
113 for (
int row = 0; row < height; ++row)
115 for (
int col = 0; col < width; ++col)
117 std::cout << std::setw (5 + prec) << ptr[row * width + col] <<
" ";
119 std::cout << std::endl;
122 std::cout << std::endl;
136 template <
typename T>
137 void cpuReduce (T *in, T *out, uint32_t cols, uint32_t rows, std::function<
bool (T, T)> func)
139 for (uint r = 0; r < rows; ++r)
141 T rec = in[r * cols];
142 for (uint c = 1; c < cols; ++c)
144 T tmp = in[r * cols + c];
145 if (func (tmp, rec)) rec = tmp;
160 template <
typename T>
163 for (uint r = 0; r < rows; ++r)
164 out[r] = std::accumulate (&in[r * cols], &in[r * cols] + cols, 0.f);
177 template <
typename T>
178 void cpuInScan (T *in, T *out, uint32_t width, uint32_t height)
181 for (uint32_t row = 0; row < height; ++row)
182 out[row * width] = in[row * width];
184 for (uint32_t row = 0; row < height; ++row)
185 for (uint32_t col = 1; col < width; ++col)
186 out[row * width + col] = out[row * width + col - 1] + in[row * width + col];
199 template <
typename T>
200 void cpuExScan (T *in, T *out, uint32_t width, uint32_t height)
203 for (uint32_t row = 0; row < height; ++row)
204 out[row * width] = 0;
206 for (uint32_t row = 0; row < height; ++row)
207 for (uint32_t col = 1; col < width; ++col)
208 out[row * width + col] = out[row * width + col - 1] + in[row * width + col - 1];
219 template <
typename T>
222 for (uint32_t gY = 0; gY < 128; ++gY)
224 uint32_t yi = gY * 3 + 1;
226 for (uint32_t gX = 0; gX < 128 * 8; gX += 8)
228 uint32_t xi = gX * 4 + 1 * 8;
230 for (uint32_t k = 0; k < 8; ++k)
231 out[gY * (128 * 8) + gX + k] = in[(48 + yi) * (640 * 8) + ((64 * 8) + xi) + k];
245 template <
typename T>
248 int p = std::log2 (nr);
249 uint32_t nrx = std::pow (2, p - p / 2);
250 uint32_t nry = std::pow (2, p / 2);
252 uint stepX = 128 / nrx;
253 uint stepY = 128 / nry;
255 for (uint32_t gY = 0; gY < nry; ++gY)
257 uint32_t yi = gY * stepY + (stepY >> 1) - 1;
259 for (uint32_t gX = 0; gX < nrx * 8; gX += 8)
261 uint32_t xi = gX * stepX + ((stepX >> 1) - 1) * 8;
263 for (uint32_t k = 0; k < 8; ++k)
264 out[gY * (nrx * 8) + gX + k] = in[yi * (128 * 8) + xi + k];
280 template <
typename T>
283 for (uint32_t j = 0; j < n; ++j)
284 W[j] = 100.f / (100.f + D[j].dist);
286 *SW = std::accumulate (W, W + n, 0.0);
299 template <
typename T>
302 mean[0] = mean[1] = mean[2] = mean[3] = 0.0;
303 mean[4] = mean[5] = mean[6] = mean[7] = 0.0;
305 for (uint32_t j = 0; j < n; ++j)
307 mean[0] += F[j * 8] / (T) n;
308 mean[1] += F[j * 8 + 1] / (T) n;
309 mean[2] += F[j * 8 + 2] / (T) n;
311 mean[4] += M[j * 8] / (T) n;
312 mean[5] += M[j * 8 + 1] / (T) n;
313 mean[6] += M[j * 8 + 2] / (T) n;
328 template <
typename T>
331 MEAN[0] = MEAN[1] = MEAN[2] = MEAN[3] = 0.0;
332 MEAN[4] = MEAN[5] = MEAN[6] = MEAN[7] = 0.0;
334 cl_double sum_w = std::accumulate (W, W + n, 0.0);
336 for (uint32_t j = 0; j < n; ++j)
340 MEAN[0] += w * F[j * 8];
341 MEAN[1] += w * F[j * 8 + 1];
342 MEAN[2] += w * F[j * 8 + 2];
344 MEAN[4] += w * M[j * 8];
345 MEAN[5] += w * M[j * 8 + 1];
346 MEAN[6] += w * M[j * 8 + 2];
362 template <
typename T>
363 void cpuICPDevs (T *F, T *M, T *DF, T *DM, T *mean, uint32_t n)
365 for (uint32_t j = 0; j < n; ++j)
367 for (uint32_t k = 0; k < 4; ++k)
369 DF[j * 4 + k] = F[j * 8 + k] - mean[k];
370 DM[j * 4 + k] = M[j * 8 + k] - mean[4 + k];
386 template <
typename T>
387 void cpuICPS (T *DM, T *DF, T *S, uint32_t m,
float c)
389 for (uint32_t j = 0; j < 11; ++j)
392 for (uint32_t i = 0; i < m; ++i)
394 T mp[3] = { c * DM[i * 4], c * DM[i * 4 + 1], c * DM[i * 4 + 2] };
395 T fp[3] = { c * DF[i * 4], c * DF[i * 4 + 1], c * DF[i * 4 + 2] };
397 S[0] += mp[0] * fp[0];
398 S[1] += mp[0] * fp[1];
399 S[2] += mp[0] * fp[2];
400 S[3] += mp[1] * fp[0];
401 S[4] += mp[1] * fp[1];
402 S[5] += mp[1] * fp[2];
403 S[6] += mp[2] * fp[0];
404 S[7] += mp[2] * fp[1];
405 S[8] += mp[2] * fp[2];
406 S[9] += mp[0] * mp[0] + mp[1] * mp[1] + mp[2] * mp[2];
407 S[10] += fp[0] * fp[0] + fp[1] * fp[1] + fp[2] * fp[2];
423 template <
typename T>
424 void cpuICPSw (T *M, T *F, T *W, T *S, uint32_t m,
float c)
426 for (uint32_t j = 0; j < 11; ++j)
429 for (uint32_t i = 0; i < m; ++i)
431 T mp[3] = { c * M[i * 4], c * M[i * 4 + 1], c * M[i * 4 + 2] };
432 T fp[3] = { c * F[i * 4], c * F[i * 4 + 1], c * F[i * 4 + 2] };
435 S[0] += w * mp[0] * fp[0];
436 S[1] += w * mp[0] * fp[1];
437 S[2] += w * mp[0] * fp[2];
438 S[3] += w * mp[1] * fp[0];
439 S[4] += w * mp[1] * fp[1];
440 S[5] += w * mp[1] * fp[2];
441 S[6] += w * mp[2] * fp[0];
442 S[7] += w * mp[2] * fp[1];
443 S[8] += w * mp[2] * fp[2];
444 S[9] += w * (mp[0] * mp[0] + mp[1] * mp[1] + mp[2] * mp[2]);
445 S[10] += w * (fp[0] * fp[0] + fp[1] * fp[1] + fp[2] * fp[2]);
458 template <
typename T>
461 c[0] = (a[1] * b[2]) - (a[2] * b[1]);
462 c[1] = (a[2] * b[0]) - (a[0] * b[2]);
463 c[2] = (a[0] * b[1]) - (a[1] * b[0]);
477 template <
typename T>
480 T q[4] = { D[0], D[1], D[2], D[3] };
481 T t[3] = { D[4], D[5], D[6] };
484 for (uint32_t i = 0; i < m; ++i)
486 T p[3] = { M[i * 8], M[i * 8 + 1], M[i * 8 + 2] };
488 T q2[3] = { 2 * q[0], 2 * q[1], 2 * q[2] };
491 qcp[0] = qcp[0] + q[3] * p[0];
492 qcp[1] = qcp[1] + q[3] * p[1];
493 qcp[2] = qcp[2] + q[3] * p[2];
496 tp[0] = s * (p[0] + q2cqcp[0]) + t[0];
497 tp[1] = s * (p[1] + q2cqcp[1]) + t[1];
498 tp[2] = s * (p[2] + q2cqcp[2]) + t[2];
501 tM[i * 8 + 1] = tp[1];
502 tM[i * 8 + 2] = tp[2];
503 tM[i * 8 + 3] = M[i * 8 + 3];
504 tM[i * 8 + 4] = M[i * 8 + 4];
505 tM[i * 8 + 5] = M[i * 8 + 5];
506 tM[i * 8 + 6] = M[i * 8 + 6];
507 tM[i * 8 + 7] = M[i * 8 + 7];
522 template <
typename T>
525 T q[4] = { D[0], D[1], D[2], D[3] };
526 T t[3] = { D[4], D[5], D[6] };
529 T Q[4][4] = { { q[3], -q[2], q[1], q[0] },
530 { q[2], q[3], -q[0], q[1] },
531 { -q[1], q[0], q[3], q[2] },
532 { -q[0], -q[1], -q[2], q[3] } };
534 T Q_[3][4] = { { q[3], -q[2], q[1], -q[0] },
535 { q[2], q[3], -q[0], -q[1] },
536 { -q[1], q[0], q[3], -q[2] } };
538 for (uint32_t i = 0; i < m; ++i)
540 T p[4] = { M[i * 8], M[i * 8 + 1], M[i * 8 + 2], 0.f };
542 T p_[4] = { std::inner_product (Q[0], Q[0] + 4, p, 0.f),
543 std::inner_product (Q[1], Q[1] + 4, p, 0.f),
544 std::inner_product (Q[2], Q[2] + 4, p, 0.f),
545 std::inner_product (Q[3], Q[3] + 4, p, 0.f) };
548 tp[0] = s * std::inner_product (Q_[0], Q_[0] + 4, p_, 0.f) + t[0];
549 tp[1] = s * std::inner_product (Q_[1], Q_[1] + 4, p_, 0.f) + t[1];
550 tp[2] = s * std::inner_product (Q_[2], Q_[2] + 4, p_, 0.f) + t[2];
553 tM[i * 8 + 1] = tp[1];
554 tM[i * 8 + 2] = tp[2];
555 tM[i * 8 + 3] = M[i * 8 + 3];
556 tM[i * 8 + 4] = M[i * 8 + 4];
557 tM[i * 8 + 5] = M[i * 8 + 5];
558 tM[i * 8 + 6] = M[i * 8 + 6];
559 tM[i * 8 + 7] = M[i * 8 + 7];
574 template <
typename T>
577 for (uint32_t i = 0; i < m; ++i)
579 tM[i * 8] = std::inner_product (&D[0], &D[4], &M[i * 8], 0.f);
580 tM[i * 8 + 1] = std::inner_product (&D[4], &D[8], &M[i * 8], 0.f);
581 tM[i * 8 + 2] = std::inner_product (&D[8], &D[12], &M[i * 8], 0.f);
582 tM[i * 8 + 3] = M[i * 8 + 3];
583 tM[i * 8 + 4] = M[i * 8 + 4];
584 tM[i * 8 + 5] = M[i * 8 + 5];
585 tM[i * 8 + 6] = M[i * 8 + 6];
586 tM[i * 8 + 7] = M[i * 8 + 7];
597 template <
typename T>
607 return std::sqrt (sum);
618 template <
typename T>
623 sum += std::pow (x1[0] - x2[0], 2);
624 sum += std::pow (x1[1] - x2[1], 2);
625 sum += std::pow (x1[2] - x2[2], 2);
626 sum += std::pow (x1[3] - x2[3], 2);
628 return std::sqrt (sum);
637 template <
typename T>
656 template <
typename T>
660 x_new[0] = std::inner_product (N , N + 4, x, 0.f);
661 x_new[1] = std::inner_product (N + 4, N + 8, x, 0.f);
662 x_new[2] = std::inner_product (N + 8, N + 12, x, 0.f);
663 x_new[3] = std::inner_product (N + 12, N + 16, x, 0.f);
681 template <
typename T>
694 T sk = sqrt (Sij[9] / Sij[10]);
698 Sxx - Syy - Szz, Sxy + Syx, Szx + Sxz, Syz - Szy,
699 Sxy + Syx, - Sxx + Syy - Szz, Syz + Szy, Szx - Sxz,
700 Szx + Sxz, Syz + Szy, - Sxx - Syy + Szz, Sxy - Syx,
701 Syz - Szy, Szx - Sxz, Sxy - Syx, Sxx + Syy + Szz
706 T x[4] = { 1.f, 1.f, 1.f, 1.f };
715 for (uint iter = 0; iter < maxIter; ++iter)
722 if ((error_new =
cpuDistance (x, x_new)) == error)
break;
724 std::copy (x_new, x_new + 4, x);
727 T lambda = std::inner_product (N, N + 4, x_new, 0.f) / x_new[0];
736 x[0] = x[1] = x[2] = x[3] = 1.f;
742 std::copy (x_new, x_new + 4, x);
748 T qk[4] = { x_new[0], x_new[1], x_new[2], x_new[3] };
750 T mf[3] = { means[0], means[1], means[2] };
751 T mm[3] = { means[4], means[5], means[6] };
754 T qk_2[3] = { 2 * qk[0], 2 * qk[1], 2 * qk[2] };
756 T mmw[3] = { qk[3] * mm[0], qk[3] * mm[1], qk[3] * mm[2] };
757 T tmp1[3] = { cp1[0] + mmw[0], cp1[1] + mmw[1], cp1[2] + mmw[2] };
759 T tmp2[3] = { sk * (mm[0] + cp2[0]), sk * (mm[1] + cp2[1]), sk * (mm[2] + cp2[2]) };
760 T tk[4] = { mf[0] - tmp2[0], mf[1] - tmp2[1], mf[2] - tmp2[2], sk };
762 std::copy (qk, qk + 4, Tk);
763 std::copy (tk, tk + 4, Tk + 4);
768 #endif // ICP_HELPERFUNCS_HPP
void cpuICPTransformM(T *M, T *tM, T *D, uint32_t m)
Performs a homogeneous transformation on a set of points using a transformation matrix.
Definition: helper_funcs.hpp:575
Offers functions that are serial CPU implementations of the relevant algorithms in the ICP pipeline...
Definition: helper_funcs.hpp:47
void cpuICPSw(T *M, T *F, T *W, T *S, uint32_t m, float c)
Calculates the S matrix and the constituents of the scale factor s.
Definition: helper_funcs.hpp:424
void cpuICPLMs(T *in, T *out)
Samples a point cloud for 16384 (128x128) landmarks.
Definition: helper_funcs.hpp:220
void cpuExScan(T *in, T *out, uint32_t width, uint32_t height)
Performs an exclusive scan operation on the columns of an array.
Definition: helper_funcs.hpp:200
void cpuICPMean(T *F, T *M, T *mean, uint32_t n)
Computes the mean on the xyz dimensions of the set of 8-D points.
Definition: helper_funcs.hpp:300
void printBufferF(const char *title, T *ptr, uint32_t width, uint32_t height, uint32_t prec)
Prints an array of floating-point type to standard output.
Definition: helper_funcs.hpp:107
void cpuICPWeights(rbc_dist_id *D, T *W, cl_double *SW, uint32_t n)
Computes weights for pairs of points in the fixed and moving sets, and also reduces them to get their...
Definition: helper_funcs.hpp:281
void cpuReduce(T *in, T *out, uint32_t cols, uint32_t rows, std::function< bool(T, T)> func)
Reduces each row of an array to a single element.
Definition: helper_funcs.hpp:137
void cpuProd(T *N, T *x, T *x_new)
Computes a matrix-vector product, .
Definition: helper_funcs.hpp:657
T cpuICPPowerMethod(T *Sij, T *means, T *Tk)
Computes the quantities that represent the incremental development in the transformation estimation i...
Definition: helper_funcs.hpp:682
void cpuICPTransformQ2(T *M, T *tM, T *D, uint32_t m)
Performs a homogeneous transformation on a set of points using a quaternion and a translation vector...
Definition: helper_funcs.hpp:523
void cpuInScan(T *in, T *out, uint32_t width, uint32_t height)
Performs an inclusive scan operation on the columns of an array.
Definition: helper_funcs.hpp:178
bool setProfilingFlag(int argc, char **argv)
Checks the command line arguments for the profiling flag, --profiling.
Definition: helper_funcs.cpp:66
T cpuDistance(T *x1, T *x2)
Computes the vector distance ( norm).
Definition: helper_funcs.hpp:619
void cpuICPMeanWeighted(T *F, T *M, T *MEAN, T *W, uint32_t n)
Computes the weighted mean on the xyz dimensions of the set of 8-D points.
Definition: helper_funcs.hpp:329
void cpuReduceSum(T *in, T *out, uint32_t cols, uint32_t rows)
Reduces each row of an array to a single element (sum).
Definition: helper_funcs.hpp:161
void cross_product(T *a, T *b, T *c)
Performs a cross product.
Definition: helper_funcs.hpp:459
void cpuICPS(T *DM, T *DF, T *S, uint32_t m, float c)
Calculates the S matrix and the constituents of the scale factor s.
Definition: helper_funcs.hpp:387
void cpuICPDevs(T *F, T *M, T *DF, T *DM, T *mean, uint32_t n)
Computes the deviations of a set of points from their mean.
Definition: helper_funcs.hpp:363
void printBuffer(const char *title, T *ptr, uint32_t width, uint32_t height)
Prints an array of an integer type to standard output.
Definition: helper_funcs.hpp:80
T cpuLength(T *x)
Computes the vector length ( norm).
Definition: helper_funcs.hpp:598
void cpuICPTransformQ(T *M, T *tM, T *D, uint32_t m)
Performs a homogeneous transformation on a set of points using a quaternion and a translation vector...
Definition: helper_funcs.hpp:478
void cpuNormalize(T *x)
Normalizes a vector.
Definition: helper_funcs.hpp:638
uint64_t nextPow2(T num)
Returns the first power of 2 greater than or equal to the input.
Definition: helper_funcs.hpp:60
void cpuICPReps(T *in, T *out, uint32_t nr)
Samples a set of 16384 (128x128) landmarks for representatives.
Definition: helper_funcs.hpp:246