ICP  1.1.0
 Hosted by GitHub
helper_funcs.hpp
Go to the documentation of this file.
1 
29 #ifndef ICP_HELPERFUNCS_HPP
30 #define ICP_HELPERFUNCS_HPP
31 
32 #include <cassert>
33 #include <algorithm>
34 #include <functional>
35 #include <RBC/data_types.hpp>
36 
37 #if defined(__APPLE__) || defined(__MACOSX)
38 #include <OpenCL/cl.hpp>
39 #else
40 #include <CL/cl.hpp>
41 #endif
42 
43 
47 namespace ICP
48 {
49 
51  bool setProfilingFlag (int argc, char **argv);
52 
53 
59  template <typename T>
60  uint64_t nextPow2 (T num)
61  {
62  assert (num >= 0);
63 
64  uint64_t pow;
65  for (pow = 1; pow < (uint64_t) num; pow <<= 1) ;
66 
67  return pow;
68  }
69 
70 
79  template <typename T>
80  void printBuffer (const char *title, T *ptr, uint32_t width, uint32_t height)
81  {
82  std::cout << title << std::endl;
83 
84  for (int row = 0; row < height; ++row)
85  {
86  for (int col = 0; col < width; ++col)
87  {
88  std::cout << std::setw (3 * sizeof (T)) << +ptr[row * width + col] << " ";
89  }
90  std::cout << std::endl;
91  }
92 
93  std::cout << std::endl;
94  }
95 
96 
106  template <typename T>
107  void printBufferF (const char *title, T *ptr, uint32_t width, uint32_t height, uint32_t prec)
108  {
109  std::ios::fmtflags f (std::cout.flags ());
110  std::cout << title << std::endl;
111  std::cout << std::fixed << std::setprecision (prec);
112 
113  for (int row = 0; row < height; ++row)
114  {
115  for (int col = 0; col < width; ++col)
116  {
117  std::cout << std::setw (5 + prec) << ptr[row * width + col] << " ";
118  }
119  std::cout << std::endl;
120  }
121 
122  std::cout << std::endl;
123  std::cout.flags (f);
124  }
125 
126 
136  template <typename T>
137  void cpuReduce (T *in, T *out, uint32_t cols, uint32_t rows, std::function<bool (T, T)> func)
138  {
139  for (uint r = 0; r < rows; ++r)
140  {
141  T rec = in[r * cols];
142  for (uint c = 1; c < cols; ++c)
143  {
144  T tmp = in[r * cols + c];
145  if (func (tmp, rec)) rec = tmp;
146  }
147  out[r] = rec;
148  }
149  }
150 
151 
160  template <typename T>
161  void cpuReduceSum (T *in, T *out, uint32_t cols, uint32_t rows)
162  {
163  for (uint r = 0; r < rows; ++r)
164  out[r] = std::accumulate (&in[r * cols], &in[r * cols] + cols, 0.f);
165  }
166 
167 
177  template <typename T>
178  void cpuInScan (T *in, T *out, uint32_t width, uint32_t height)
179  {
180  // Initialize the first element of each row
181  for (uint32_t row = 0; row < height; ++row)
182  out[row * width] = in[row * width];
183  // Perform the scan
184  for (uint32_t row = 0; row < height; ++row)
185  for (uint32_t col = 1; col < width; ++col)
186  out[row * width + col] = out[row * width + col - 1] + in[row * width + col];
187  }
188 
189 
199  template <typename T>
200  void cpuExScan (T *in, T *out, uint32_t width, uint32_t height)
201  {
202  // Initialize the first element of each row
203  for (uint32_t row = 0; row < height; ++row)
204  out[row * width] = 0;
205  // Perform the scan
206  for (uint32_t row = 0; row < height; ++row)
207  for (uint32_t col = 1; col < width; ++col)
208  out[row * width + col] = out[row * width + col - 1] + in[row * width + col - 1];
209  }
210 
211 
219  template <typename T>
220  void cpuICPLMs (T *in, T *out)
221  {
222  for (uint32_t gY = 0; gY < 128; ++gY)
223  {
224  uint32_t yi = gY * 3 + 1;
225 
226  for (uint32_t gX = 0; gX < 128 * 8; gX += 8)
227  {
228  uint32_t xi = gX * 4 + 1 * 8;
229 
230  for (uint32_t k = 0; k < 8; ++k)
231  out[gY * (128 * 8) + gX + k] = in[(48 + yi) * (640 * 8) + ((64 * 8) + xi) + k];
232  }
233  }
234  }
235 
236 
245  template <typename T>
246  void cpuICPReps (T *in, T *out, uint32_t nr)
247  {
248  int p = std::log2 (nr);
249  uint32_t nrx = std::pow (2, p - p / 2);
250  uint32_t nry = std::pow (2, p / 2);
251 
252  uint stepX = 128 / nrx;
253  uint stepY = 128 / nry;
254 
255  for (uint32_t gY = 0; gY < nry; ++gY)
256  {
257  uint32_t yi = gY * stepY + (stepY >> 1) - 1;
258 
259  for (uint32_t gX = 0; gX < nrx * 8; gX += 8)
260  {
261  uint32_t xi = gX * stepX + ((stepX >> 1) - 1) * 8;
262 
263  for (uint32_t k = 0; k < 8; ++k)
264  out[gY * (nrx * 8) + gX + k] = in[yi * (128 * 8) + xi + k];
265  }
266  }
267  }
268 
269 
280  template <typename T>
281  void cpuICPWeights (rbc_dist_id *D, T *W, cl_double *SW, uint32_t n)
282  {
283  for (uint32_t j = 0; j < n; ++j)
284  W[j] = 100.f / (100.f + D[j].dist);
285 
286  *SW = std::accumulate (W, W + n, 0.0);
287  }
288 
289 
299  template <typename T>
300  void cpuICPMean (T *F, T *M, T *mean, uint32_t n)
301  {
302  mean[0] = mean[1] = mean[2] = mean[3] = 0.0;
303  mean[4] = mean[5] = mean[6] = mean[7] = 0.0;
304 
305  for (uint32_t j = 0; j < n; ++j)
306  {
307  mean[0] += F[j * 8] / (T) n;
308  mean[1] += F[j * 8 + 1] / (T) n;
309  mean[2] += F[j * 8 + 2] / (T) n;
310 
311  mean[4] += M[j * 8] / (T) n;
312  mean[5] += M[j * 8 + 1] / (T) n;
313  mean[6] += M[j * 8 + 2] / (T) n;
314  }
315  }
316 
317 
328  template <typename T>
329  void cpuICPMeanWeighted (T *F, T *M, T *MEAN, T *W, uint32_t n)
330  {
331  MEAN[0] = MEAN[1] = MEAN[2] = MEAN[3] = 0.0;
332  MEAN[4] = MEAN[5] = MEAN[6] = MEAN[7] = 0.0;
333 
334  cl_double sum_w = std::accumulate (W, W + n, 0.0);
335 
336  for (uint32_t j = 0; j < n; ++j)
337  {
338  T w = W[j] / sum_w;
339 
340  MEAN[0] += w * F[j * 8];
341  MEAN[1] += w * F[j * 8 + 1];
342  MEAN[2] += w * F[j * 8 + 2];
343 
344  MEAN[4] += w * M[j * 8];
345  MEAN[5] += w * M[j * 8 + 1];
346  MEAN[6] += w * M[j * 8 + 2];
347  }
348  }
349 
350 
362  template <typename T>
363  void cpuICPDevs (T *F, T *M, T *DF, T *DM, T *mean, uint32_t n)
364  {
365  for (uint32_t j = 0; j < n; ++j)
366  {
367  for (uint32_t k = 0; k < 4; ++k)
368  {
369  DF[j * 4 + k] = F[j * 8 + k] - mean[k];
370  DM[j * 4 + k] = M[j * 8 + k] - mean[4 + k];
371  }
372  }
373  }
374 
375 
386  template <typename T>
387  void cpuICPS (T *DM, T *DF, T *S, uint32_t m, float c)
388  {
389  for (uint32_t j = 0; j < 11; ++j)
390  S[j] = 0.f;
391 
392  for (uint32_t i = 0; i < m; ++i)
393  {
394  T mp[3] = { c * DM[i * 4], c * DM[i * 4 + 1], c * DM[i * 4 + 2] };
395  T fp[3] = { c * DF[i * 4], c * DF[i * 4 + 1], c * DF[i * 4 + 2] };
396 
397  S[0] += mp[0] * fp[0];
398  S[1] += mp[0] * fp[1];
399  S[2] += mp[0] * fp[2];
400  S[3] += mp[1] * fp[0];
401  S[4] += mp[1] * fp[1];
402  S[5] += mp[1] * fp[2];
403  S[6] += mp[2] * fp[0];
404  S[7] += mp[2] * fp[1];
405  S[8] += mp[2] * fp[2];
406  S[9] += mp[0] * mp[0] + mp[1] * mp[1] + mp[2] * mp[2];
407  S[10] += fp[0] * fp[0] + fp[1] * fp[1] + fp[2] * fp[2];
408  }
409  }
410 
411 
423  template <typename T>
424  void cpuICPSw (T *M, T *F, T *W, T *S, uint32_t m, float c)
425  {
426  for (uint32_t j = 0; j < 11; ++j)
427  S[j] = 0.f;
428 
429  for (uint32_t i = 0; i < m; ++i)
430  {
431  T mp[3] = { c * M[i * 4], c * M[i * 4 + 1], c * M[i * 4 + 2] };
432  T fp[3] = { c * F[i * 4], c * F[i * 4 + 1], c * F[i * 4 + 2] };
433  T w = W[i];
434 
435  S[0] += w * mp[0] * fp[0];
436  S[1] += w * mp[0] * fp[1];
437  S[2] += w * mp[0] * fp[2];
438  S[3] += w * mp[1] * fp[0];
439  S[4] += w * mp[1] * fp[1];
440  S[5] += w * mp[1] * fp[2];
441  S[6] += w * mp[2] * fp[0];
442  S[7] += w * mp[2] * fp[1];
443  S[8] += w * mp[2] * fp[2];
444  S[9] += w * (mp[0] * mp[0] + mp[1] * mp[1] + mp[2] * mp[2]);
445  S[10] += w * (fp[0] * fp[0] + fp[1] * fp[1] + fp[2] * fp[2]);
446  }
447  }
448 
449 
458  template <typename T>
459  void cross_product (T *a, T *b, T *c)
460  {
461  c[0] = (a[1] * b[2]) - (a[2] * b[1]);
462  c[1] = (a[2] * b[0]) - (a[0] * b[2]);
463  c[2] = (a[0] * b[1]) - (a[1] * b[0]);
464  }
465 
466 
477  template <typename T>
478  void cpuICPTransformQ (T *M, T *tM, T *D, uint32_t m)
479  {
480  T q[4] = { D[0], D[1], D[2], D[3] };
481  T t[3] = { D[4], D[5], D[6] };
482  T s = D[7];
483 
484  for (uint32_t i = 0; i < m; ++i)
485  {
486  T p[3] = { M[i * 8], M[i * 8 + 1], M[i * 8 + 2] };
487 
488  T q2[3] = { 2 * q[0], 2 * q[1], 2 * q[2] };
489 
490  T qcp[3]; cross_product (q, p, qcp);
491  qcp[0] = qcp[0] + q[3] * p[0];
492  qcp[1] = qcp[1] + q[3] * p[1];
493  qcp[2] = qcp[2] + q[3] * p[2];
494 
495  T tp[3], q2cqcp[3]; cross_product (q2, qcp, q2cqcp);
496  tp[0] = s * (p[0] + q2cqcp[0]) + t[0];
497  tp[1] = s * (p[1] + q2cqcp[1]) + t[1];
498  tp[2] = s * (p[2] + q2cqcp[2]) + t[2];
499 
500  tM[i * 8] = tp[0];
501  tM[i * 8 + 1] = tp[1];
502  tM[i * 8 + 2] = tp[2];
503  tM[i * 8 + 3] = M[i * 8 + 3];
504  tM[i * 8 + 4] = M[i * 8 + 4];
505  tM[i * 8 + 5] = M[i * 8 + 5];
506  tM[i * 8 + 6] = M[i * 8 + 6];
507  tM[i * 8 + 7] = M[i * 8 + 7];
508  }
509  }
510 
511 
522  template <typename T>
523  void cpuICPTransformQ2 (T *M, T *tM, T *D, uint32_t m)
524  {
525  T q[4] = { D[0], D[1], D[2], D[3] };
526  T t[3] = { D[4], D[5], D[6] };
527  T s = D[7];
528 
529  T Q[4][4] = { { q[3], -q[2], q[1], q[0] },
530  { q[2], q[3], -q[0], q[1] },
531  { -q[1], q[0], q[3], q[2] },
532  { -q[0], -q[1], -q[2], q[3] } };
533 
534  T Q_[3][4] = { { q[3], -q[2], q[1], -q[0] },
535  { q[2], q[3], -q[0], -q[1] },
536  { -q[1], q[0], q[3], -q[2] } };
537 
538  for (uint32_t i = 0; i < m; ++i)
539  {
540  T p[4] = { M[i * 8], M[i * 8 + 1], M[i * 8 + 2], 0.f };
541 
542  T p_[4] = { std::inner_product (Q[0], Q[0] + 4, p, 0.f),
543  std::inner_product (Q[1], Q[1] + 4, p, 0.f),
544  std::inner_product (Q[2], Q[2] + 4, p, 0.f),
545  std::inner_product (Q[3], Q[3] + 4, p, 0.f) };
546 
547  T tp[4];
548  tp[0] = s * std::inner_product (Q_[0], Q_[0] + 4, p_, 0.f) + t[0];
549  tp[1] = s * std::inner_product (Q_[1], Q_[1] + 4, p_, 0.f) + t[1];
550  tp[2] = s * std::inner_product (Q_[2], Q_[2] + 4, p_, 0.f) + t[2];
551 
552  tM[i * 8] = tp[0];
553  tM[i * 8 + 1] = tp[1];
554  tM[i * 8 + 2] = tp[2];
555  tM[i * 8 + 3] = M[i * 8 + 3];
556  tM[i * 8 + 4] = M[i * 8 + 4];
557  tM[i * 8 + 5] = M[i * 8 + 5];
558  tM[i * 8 + 6] = M[i * 8 + 6];
559  tM[i * 8 + 7] = M[i * 8 + 7];
560  }
561  }
562 
563 
574  template <typename T>
575  void cpuICPTransformM (T *M, T *tM, T *D, uint32_t m)
576  {
577  for (uint32_t i = 0; i < m; ++i)
578  {
579  tM[i * 8] = std::inner_product (&D[0], &D[4], &M[i * 8], 0.f);
580  tM[i * 8 + 1] = std::inner_product (&D[4], &D[8], &M[i * 8], 0.f);
581  tM[i * 8 + 2] = std::inner_product (&D[8], &D[12], &M[i * 8], 0.f);
582  tM[i * 8 + 3] = M[i * 8 + 3];
583  tM[i * 8 + 4] = M[i * 8 + 4];
584  tM[i * 8 + 5] = M[i * 8 + 5];
585  tM[i * 8 + 6] = M[i * 8 + 6];
586  tM[i * 8 + 7] = M[i * 8 + 7];
587  }
588  }
589 
590 
597  template <typename T>
598  T cpuLength (T *x)
599  {
600  T sum = 0.f;
601 
602  sum += x[0] * x[0];
603  sum += x[1] * x[1];
604  sum += x[2] * x[2];
605  sum += x[3] * x[3];
606 
607  return std::sqrt (sum);
608  }
609 
610 
618  template <typename T>
619  T cpuDistance (T *x1, T *x2)
620  {
621  T sum = 0.f;
622 
623  sum += std::pow (x1[0] - x2[0], 2);
624  sum += std::pow (x1[1] - x2[1], 2);
625  sum += std::pow (x1[2] - x2[2], 2);
626  sum += std::pow (x1[3] - x2[3], 2);
627 
628  return std::sqrt (sum);
629  }
630 
631 
637  template <typename T>
638  void cpuNormalize (T *x)
639  {
640  T norm = cpuLength (x);
641 
642  x[0] /= norm;
643  x[1] /= norm;
644  x[2] /= norm;
645  x[3] /= norm;
646  }
647 
648 
656  template <typename T>
657  void cpuProd (T *N, T *x, T *x_new)
658  {
659 
660  x_new[0] = std::inner_product (N , N + 4, x, 0.f);
661  x_new[1] = std::inner_product (N + 4, N + 8, x, 0.f);
662  x_new[2] = std::inner_product (N + 8, N + 12, x, 0.f);
663  x_new[3] = std::inner_product (N + 12, N + 16, x, 0.f);
664  }
665 
666 
681  template <typename T>
682  T cpuICPPowerMethod (T *Sij, T *means, T *Tk)
683  {
684  T Sxx = Sij[0];
685  T Sxy = Sij[1];
686  T Sxz = Sij[2];
687  T Syx = Sij[3];
688  T Syy = Sij[4];
689  T Syz = Sij[5];
690  T Szx = Sij[6];
691  T Szy = Sij[7];
692  T Szz = Sij[8];
693 
694  T sk = sqrt (Sij[9] / Sij[10]);
695 
696  T N[16] =
697  {
698  Sxx - Syy - Szz, Sxy + Syx, Szx + Sxz, Syz - Szy,
699  Sxy + Syx, - Sxx + Syy - Szz, Syz + Szy, Szx - Sxz,
700  Szx + Sxz, Syz + Szy, - Sxx - Syy + Szz, Sxy - Syx,
701  Syz - Szy, Szx - Sxz, Sxy - Syx, Sxx + Syy + Szz
702  };
703 
704  // Power Method ============================================================
705 
706  T x[4] = { 1.f, 1.f, 1.f, 1.f };
707  T x_new[4];
708 
709  // Parameters
710  uint maxIter = 1000;
711  T error, error_new;
712 
713  while (true)
714  {
715  for (uint iter = 0; iter < maxIter; ++iter)
716  {
717  cpuProd (N, x, x_new);
718 
719  cpuNormalize (x_new);
720 
721  error = error_new;
722  if ((error_new = cpuDistance (x, x_new)) == error) break;
723 
724  std::copy (x_new, x_new + 4, x);
725  }
726 
727  T lambda = std::inner_product (N, N + 4, x_new, 0.f) / x_new[0];
728 
729  if (lambda < 0)
730  {
731  N[0] -= lambda;
732  N[5] -= lambda;
733  N[10] -= lambda;
734  N[15] -= lambda;
735 
736  x[0] = x[1] = x[2] = x[3] = 1.f;
737  }
738  else
739  break;
740  }
741 
742  std::copy (x_new, x_new + 4, x);
743  cpuProd (N, x, x_new);
744  cpuNormalize (x_new);
745 
746  // =========================================================================
747 
748  T qk[4] = { x_new[0], x_new[1], x_new[2], x_new[3] };
749 
750  T mf[3] = { means[0], means[1], means[2] };
751  T mm[3] = { means[4], means[5], means[6] };
752 
753  // tk = mf - sk * (mm + cross (2 * qk.xyz, cross (qk.xyz, mm) + qk.w * mm))
754  T qk_2[3] = { 2 * qk[0], 2 * qk[1], 2 * qk[2] };
755  T cp1[3]; cross_product (qk, mm, cp1);
756  T mmw[3] = { qk[3] * mm[0], qk[3] * mm[1], qk[3] * mm[2] };
757  T tmp1[3] = { cp1[0] + mmw[0], cp1[1] + mmw[1], cp1[2] + mmw[2] };
758  T cp2[3]; cross_product (qk_2, tmp1, cp2);
759  T tmp2[3] = { sk * (mm[0] + cp2[0]), sk * (mm[1] + cp2[1]), sk * (mm[2] + cp2[2]) };
760  T tk[4] = { mf[0] - tmp2[0], mf[1] - tmp2[1], mf[2] - tmp2[2], sk };
761 
762  std::copy (qk, qk + 4, Tk);
763  std::copy (tk, tk + 4, Tk + 4);
764  }
765 
766 }
767 
768 #endif // ICP_HELPERFUNCS_HPP
void cpuICPTransformM(T *M, T *tM, T *D, uint32_t m)
Performs a homogeneous transformation on a set of points using a transformation matrix.
Definition: helper_funcs.hpp:575
Offers functions that are serial CPU implementations of the relevant algorithms in the ICP pipeline...
Definition: helper_funcs.hpp:47
void cpuICPSw(T *M, T *F, T *W, T *S, uint32_t m, float c)
Calculates the S matrix and the constituents of the scale factor s.
Definition: helper_funcs.hpp:424
void cpuICPLMs(T *in, T *out)
Samples a point cloud for 16384 (128x128) landmarks.
Definition: helper_funcs.hpp:220
void cpuExScan(T *in, T *out, uint32_t width, uint32_t height)
Performs an exclusive scan operation on the columns of an array.
Definition: helper_funcs.hpp:200
void cpuICPMean(T *F, T *M, T *mean, uint32_t n)
Computes the mean on the xyz dimensions of the set of 8-D points.
Definition: helper_funcs.hpp:300
void printBufferF(const char *title, T *ptr, uint32_t width, uint32_t height, uint32_t prec)
Prints an array of floating-point type to standard output.
Definition: helper_funcs.hpp:107
void cpuICPWeights(rbc_dist_id *D, T *W, cl_double *SW, uint32_t n)
Computes weights for pairs of points in the fixed and moving sets, and also reduces them to get their...
Definition: helper_funcs.hpp:281
void cpuReduce(T *in, T *out, uint32_t cols, uint32_t rows, std::function< bool(T, T)> func)
Reduces each row of an array to a single element.
Definition: helper_funcs.hpp:137
void cpuProd(T *N, T *x, T *x_new)
Computes a matrix-vector product, .
Definition: helper_funcs.hpp:657
T cpuICPPowerMethod(T *Sij, T *means, T *Tk)
Computes the quantities that represent the incremental development in the transformation estimation i...
Definition: helper_funcs.hpp:682
void cpuICPTransformQ2(T *M, T *tM, T *D, uint32_t m)
Performs a homogeneous transformation on a set of points using a quaternion and a translation vector...
Definition: helper_funcs.hpp:523
void cpuInScan(T *in, T *out, uint32_t width, uint32_t height)
Performs an inclusive scan operation on the columns of an array.
Definition: helper_funcs.hpp:178
bool setProfilingFlag(int argc, char **argv)
Checks the command line arguments for the profiling flag, --profiling.
Definition: helper_funcs.cpp:66
T cpuDistance(T *x1, T *x2)
Computes the vector distance ( norm).
Definition: helper_funcs.hpp:619
void cpuICPMeanWeighted(T *F, T *M, T *MEAN, T *W, uint32_t n)
Computes the weighted mean on the xyz dimensions of the set of 8-D points.
Definition: helper_funcs.hpp:329
void cpuReduceSum(T *in, T *out, uint32_t cols, uint32_t rows)
Reduces each row of an array to a single element (sum).
Definition: helper_funcs.hpp:161
void cross_product(T *a, T *b, T *c)
Performs a cross product.
Definition: helper_funcs.hpp:459
void cpuICPS(T *DM, T *DF, T *S, uint32_t m, float c)
Calculates the S matrix and the constituents of the scale factor s.
Definition: helper_funcs.hpp:387
void cpuICPDevs(T *F, T *M, T *DF, T *DM, T *mean, uint32_t n)
Computes the deviations of a set of points from their mean.
Definition: helper_funcs.hpp:363
void printBuffer(const char *title, T *ptr, uint32_t width, uint32_t height)
Prints an array of an integer type to standard output.
Definition: helper_funcs.hpp:80
T cpuLength(T *x)
Computes the vector length ( norm).
Definition: helper_funcs.hpp:598
void cpuICPTransformQ(T *M, T *tM, T *D, uint32_t m)
Performs a homogeneous transformation on a set of points using a quaternion and a translation vector...
Definition: helper_funcs.hpp:478
void cpuNormalize(T *x)
Normalizes a vector.
Definition: helper_funcs.hpp:638
uint64_t nextPow2(T num)
Returns the first power of 2 greater than or equal to the input.
Definition: helper_funcs.hpp:60
void cpuICPReps(T *in, T *out, uint32_t nr)
Samples a set of 16384 (128x128) landmarks for representatives.
Definition: helper_funcs.hpp:246