ICP  1.1.0
 Hosted by GitHub
algorithms.hpp
Go to the documentation of this file.
1 
32 #ifndef ICP_ALGORITHMS_HPP
33 #define ICP_ALGORITHMS_HPP
34 
35 #include <CLUtils.hpp>
36 #include <ICP/common.hpp>
37 #include <RBC/data_types.hpp>
38 #include <RBC/algorithms.hpp>
39 #include <eigen3/Eigen/Dense>
40 
41 
45 namespace cl_algo
46 {
48 namespace ICP
49 {
50 
52  enum class ReduceConfig : uint8_t
53  {
54  MIN,
55  MAX,
56  SUM
57  };
58 
59 
83  template <ReduceConfig C, typename T = cl_float>
84  class Reduce
85  {
86  public:
91  enum class Memory : uint8_t
92  {
93  H_IN,
94  H_OUT,
95  D_IN,
96  D_RED,
97  D_OUT
98  };
99 
101  Reduce (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
103  cl::Memory& get (Reduce::Memory mem);
105  void init (unsigned int _cols, unsigned int _rows, Staging _staging = Staging::IO);
107  void write (Reduce::Memory mem = Reduce::Memory::D_IN, void *ptr = nullptr, bool block = CL_FALSE,
108  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
110  void* read (Reduce::Memory mem = Reduce::Memory::H_OUT, bool block = CL_TRUE,
111  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
113  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
114 
115  T *hPtrIn;
116  T *hPtrOut;
118  private:
119  clutils::CLEnv &env;
120  clutils::CLEnvInfo<1> info;
121  cl::Context context;
122  cl::CommandQueue queue;
123  cl::Kernel recKernel, groupRecKernel;
124  cl::NDRange globalR, globalGR, local;
125  Staging staging;
126  size_t wgMultiple, wgXdim;
127  unsigned int cols, rows;
128  unsigned int bufferInSize, bufferGRSize, bufferOutSize;
129  cl::Buffer hBufferIn, hBufferOut;
130  cl::Buffer dBufferIn, dBufferR, dBufferOut;
131 
132  public:
140  template <typename period>
141  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
142  {
143  double pTime;
144 
145  if (wgXdim == 1)
146  {
147  queue.enqueueNDRangeKernel (recKernel, cl::NullRange, globalR, local, events, &timer.event ());
148  queue.flush (); timer.wait ();
149  pTime = timer.duration ();
150  }
151  else
152  {
153  queue.enqueueNDRangeKernel (recKernel, cl::NullRange, globalR, local, events, &timer.event ());
154  queue.flush (); timer.wait ();
155  pTime = timer.duration ();
156 
157  queue.enqueueNDRangeKernel (groupRecKernel, cl::NullRange, globalGR, local, nullptr, &timer.event ());
158  queue.flush (); timer.wait ();
159  pTime += timer.duration ();
160  }
161 
162  return pTime;
163  }
164 
165  };
166 
167 
169  enum class ScanConfig : uint8_t
170  {
171  INCLUSIVE,
172  EXCLUSIVE
173  };
174 
175 
199  template <ScanConfig C, typename T = cl_int>
200  class Scan
201  {
202  public:
207  enum class Memory : uint8_t
208  {
209  H_IN,
210  H_OUT,
211  D_IN,
212  D_SUMS,
213  D_OUT
214  };
215 
217  Scan (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
219  cl::Memory& get (Scan::Memory mem);
221  void init (unsigned int _cols, unsigned int _rows, Staging _staging = Staging::IO);
223  void write (Scan::Memory mem = Scan::Memory::D_IN, void *ptr = nullptr, bool block = CL_FALSE,
224  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
226  void* read (Scan::Memory mem = Scan::Memory::H_OUT, bool block = CL_TRUE,
227  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
229  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
230 
231  T *hPtrIn;
232  T *hPtrOut;
234  private:
235  clutils::CLEnv &env;
236  clutils::CLEnvInfo<1> info;
237  cl::Context context;
238  cl::CommandQueue queue;
239  cl::Kernel kernelScan, kernelSumsScan, kernelAddSums;
240  cl::NDRange globalScan, globalSumsScan, localScan;
241  cl::NDRange globalAddSums, localAddSums, offsetAddSums;
242  Staging staging;
243  size_t wgMultiple, wgXdim;
244  unsigned int cols, rows, bufferSize, bufferSumsSize;
245  cl::Buffer hBufferIn, hBufferOut;
246  cl::Buffer dBufferIn, dBufferOut, dBufferSums;
247 
248  public:
256  template <typename period>
257  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
258  {
259  double pTime;
260 
261  if (wgXdim == 1)
262  {
263  queue.enqueueNDRangeKernel (
264  kernelScan, cl::NullRange, globalScan, localScan, events, &timer.event ());
265  queue.flush (); timer.wait ();
266  pTime = timer.duration ();
267  }
268  else
269  {
270  queue.enqueueNDRangeKernel (
271  kernelScan, cl::NullRange, globalScan, localScan, events, &timer.event ());
272  queue.flush (); timer.wait ();
273  pTime = timer.duration ();
274 
275  queue.enqueueNDRangeKernel (
276  kernelSumsScan, cl::NullRange, globalSumsScan, localScan, nullptr, &timer.event ());
277  queue.flush (); timer.wait ();
278  pTime += timer.duration ();
279 
280  queue.enqueueNDRangeKernel (
281  kernelAddSums, offsetAddSums, globalAddSums, localAddSums, nullptr, &timer.event ());
282  queue.flush (); timer.wait ();
283  pTime += timer.duration ();
284  }
285 
286  return pTime;
287  }
288 
289  };
290 
291 
312  class ICPLMs
313  {
314  public:
319  enum class Memory : uint8_t
320  {
321  H_IN,
322  H_OUT,
323  D_IN,
324  D_OUT
325  };
326 
328  ICPLMs (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
330  cl::Memory& get (ICPLMs::Memory mem);
332  void init (Staging _staging = Staging::IO);
334  void write (ICPLMs::Memory mem = ICPLMs::Memory::D_IN, void *ptr = nullptr, bool block = CL_FALSE,
335  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
337  void* read (ICPLMs::Memory mem = ICPLMs::Memory::H_OUT, bool block = CL_TRUE,
338  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
340  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
341 
342  cl_float *hPtrIn;
343  cl_float *hPtrOut;
345  private:
346  clutils::CLEnv &env;
347  clutils::CLEnvInfo<1> info;
348  cl::Context context;
349  cl::CommandQueue queue;
350  cl::Kernel kernel;
351  cl::NDRange global;
352  Staging staging;
353  unsigned int n, m, d;
354  unsigned int bufferInSize, bufferOutSize;
355  cl::Buffer hBufferIn, hBufferOut, dBufferIn, dBufferOut;
356 
357  public:
365  template <typename period>
366  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
367  {
368  queue.enqueueNDRangeKernel (kernel, cl::NullRange, global, cl::NullRange, events, &timer.event ());
369  queue.flush (); timer.wait ();
370 
371  return timer.duration ();
372  }
373 
374  };
375 
376 
397  class ICPReps
398  {
399  public:
404  enum class Memory : uint8_t
405  {
406  H_IN,
407  H_OUT,
408  D_IN,
409  D_OUT
410  };
411 
413  ICPReps (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
415  cl::Memory& get (ICPReps::Memory mem);
417  void init (unsigned int _nr, Staging _staging = Staging::IO);
419  void write (ICPReps::Memory mem = ICPReps::Memory::D_IN, void *ptr = nullptr, bool block = CL_FALSE,
420  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
422  void* read (ICPReps::Memory mem = ICPReps::Memory::H_OUT, bool block = CL_TRUE,
423  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
425  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
426 
427  cl_float *hPtrIn;
428  cl_float *hPtrOut;
430  private:
431  clutils::CLEnv &env;
432  clutils::CLEnvInfo<1> info;
433  cl::Context context;
434  cl::CommandQueue queue;
435  cl::Kernel kernel;
436  cl::NDRange global;
437  Staging staging;
438  unsigned int m, nr, nrx, nry, d;
439  unsigned int bufferInSize, bufferOutSize;
440  cl::Buffer hBufferIn, hBufferOut, dBufferIn, dBufferOut;
441 
442  public:
450  template <typename period>
451  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
452  {
453  queue.enqueueNDRangeKernel (kernel, cl::NullRange, global, cl::NullRange, events, &timer.event ());
454  queue.flush (); timer.wait ();
455 
456  return timer.duration ();
457  }
458 
459  };
460 
461 
486  {
487  public:
492  enum class Memory : uint8_t
493  {
494  H_IN,
496  H_OUT_W,
497  H_OUT_SUM_W,
498  D_IN,
500  D_OUT_W,
501  D_GW,
502  D_OUT_SUM_W,
503  };
504 
506  ICPWeights (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
508  cl::Memory& get (ICPWeights::Memory mem);
510  void init (unsigned int _n, Staging _staging = Staging::IO);
512  void write (ICPWeights::Memory mem = ICPWeights::Memory::D_IN, void *ptr = nullptr, bool block = CL_FALSE,
513  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
515  void* read (ICPWeights::Memory mem = ICPWeights::Memory::H_OUT_SUM_W, bool block = CL_TRUE,
516  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
518  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
519 
520  rbc_dist_id *hPtrIn;
521  cl_float *hPtrOutW;
522  cl_double *hPtrOutSW;
524  private:
525  clutils::CLEnv &env;
526  clutils::CLEnvInfo<1> info;
527  cl::Context context;
528  cl::CommandQueue queue;
529  cl::Kernel weightKernel, groupWeightKernel;
530  cl::NDRange globalW, globalGW, local;
531  Staging staging;
532  size_t wgMultiple, wgXdim;
533  unsigned int n;
534  unsigned int bufferInSize, bufferOutWSize, bufferGWSize, bufferOutSWSize;
535  cl::Buffer hBufferIn, hBufferOutW, hBufferOutSW;
536  cl::Buffer dBufferIn, dBufferOutW, dBufferOutSW;
537  cl::Buffer dBufferGW;
538 
539  public:
547  template <typename period>
548  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
549  {
550  double pTime;
551 
552  if (wgXdim == 1)
553  {
554  queue.enqueueNDRangeKernel (weightKernel, cl::NullRange, globalW, local, events, &timer.event ());
555  queue.flush (); timer.wait ();
556  pTime = timer.duration ();
557  }
558  else
559  {
560  queue.enqueueNDRangeKernel (weightKernel, cl::NullRange, globalW, local, events, &timer.event ());
561  queue.flush (); timer.wait ();
562  pTime = timer.duration ();
563 
564  queue.enqueueNDRangeKernel (groupWeightKernel, cl::NullRange, globalGW, local, nullptr, &timer.event ());
565  queue.flush (); timer.wait ();
566  pTime += timer.duration ();
567  }
568 
569  return pTime;
570  }
571 
572  };
573 
574 
576  enum class ICPMeanConfig : uint8_t
577  {
578  REGULAR,
580  WEIGHTED
582  };
583 
584 
595  template <ICPMeanConfig C>
596  class ICPMean;
597 
598 
624  template <>
626  {
627  public:
632  enum class Memory : uint8_t
633  {
634  H_IN_F,
635  H_IN_M,
636  H_OUT,
638  D_IN_F,
639  D_IN_M,
640  D_GM,
641  D_OUT
643  };
644 
646  ICPMean (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
648  cl::Memory& get (ICPMean::Memory mem);
650  void init (unsigned int _n, Staging _staging = Staging::IO);
652  void write (ICPMean::Memory mem = ICPMean::Memory::D_IN_F, void *ptr = nullptr, bool block = CL_FALSE,
653  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
655  void* read (ICPMean::Memory mem = ICPMean::Memory::H_OUT, bool block = CL_TRUE,
656  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
658  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
659 
660  cl_float *hPtrInF;
661  cl_float *hPtrInM;
662  cl_float *hPtrOut;
664  private:
665  clutils::CLEnv &env;
666  clutils::CLEnvInfo<1> info;
667  cl::Context context;
668  cl::CommandQueue queue;
669  cl::Kernel meanKernel, groupMeanKernel;
670  cl::NDRange globalM, globalGM, local;
671  Staging staging;
672  size_t wgMultiple, wgXdim;
673  unsigned int n, d;
674  unsigned int bufferInSize, bufferGMSize, bufferOutSize;
675  cl::Buffer hBufferInF, hBufferInM, hBufferOut;
676  cl::Buffer dBufferInF, dBufferInM, dBufferGM, dBufferOut;
677 
678  public:
686  template <typename period>
687  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
688  {
689  double pTime;
690 
691  if (wgXdim == 1)
692  {
693  queue.enqueueNDRangeKernel (meanKernel, cl::NullRange, globalM, local, events, &timer.event ());
694  queue.flush (); timer.wait ();
695  pTime = timer.duration ();
696  }
697  else
698  {
699  queue.enqueueNDRangeKernel (meanKernel, cl::NullRange, globalM, local, events, &timer.event ());
700  queue.flush (); timer.wait ();
701  pTime = timer.duration ();
702 
703  queue.enqueueNDRangeKernel (groupMeanKernel, cl::NullRange, globalGM, local, nullptr, &timer.event ());
704  queue.flush (); timer.wait ();
705  pTime += timer.duration ();
706  }
707 
708  return pTime;
709  }
710 
711  };
712 
713 
743  template <>
745  {
746  public:
751  enum class Memory : uint8_t
752  {
753  H_IN_F,
754  H_IN_M,
755  H_IN_W,
756  H_IN_SUM_W,
757  H_OUT,
759  D_IN_F,
760  D_IN_M,
761  D_IN_W,
762  D_IN_SUM_W,
763  D_GM,
764  D_OUT
766  };
767 
769  ICPMean (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
771  cl::Memory& get (ICPMean::Memory mem);
773  void init (unsigned int _n, Staging _staging = Staging::IO);
775  void write (ICPMean::Memory mem = ICPMean::Memory::D_IN_F, void *ptr = nullptr, bool block = CL_FALSE,
776  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
778  void* read (ICPMean::Memory mem = ICPMean::Memory::H_OUT, bool block = CL_TRUE,
779  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
781  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
782 
783  cl_float *hPtrInF;
784  cl_float *hPtrInM;
785  cl_float *hPtrInW;
786  cl_double *hPtrInSW;
787  cl_float *hPtrOut;
789  private:
790  clutils::CLEnv &env;
791  clutils::CLEnvInfo<1> info;
792  cl::Context context;
793  cl::CommandQueue queue;
794  cl::Kernel meanKernel, groupMeanKernel;
795  cl::NDRange globalM, globalGM, local;
796  Staging staging;
797  size_t wgMultiple, wgXdim;
798  unsigned int n, d;
799  unsigned int bufferInFMSize, bufferInWSize, bufferInSWSize, bufferGMSize, bufferOutSize;
800  cl::Buffer hBufferInF, hBufferInM, hBufferInW, hBufferInSW, hBufferOut;
801  cl::Buffer dBufferInF, dBufferInM, dBufferInW, dBufferInSW, dBufferOut;
802  cl::Buffer dBufferGM;
803 
804  public:
812  template <typename period>
813  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
814  {
815  double pTime;
816 
817  if (wgXdim == 1)
818  {
819  queue.enqueueNDRangeKernel (meanKernel, cl::NullRange, globalM, local, events, &timer.event ());
820  queue.flush (); timer.wait ();
821  pTime = timer.duration ();
822  }
823  else
824  {
825  queue.enqueueNDRangeKernel (meanKernel, cl::NullRange, globalM, local, events, &timer.event ());
826  queue.flush (); timer.wait ();
827  pTime = timer.duration ();
828 
829  queue.enqueueNDRangeKernel (groupMeanKernel, cl::NullRange, globalGM, local, nullptr, &timer.event ());
830  queue.flush (); timer.wait ();
831  pTime += timer.duration ();
832  }
833 
834  return pTime;
835  }
836 
837  };
838 
839 
867  class ICPDevs
868  {
869  public:
874  enum class Memory : uint8_t
875  {
876  H_IN_F,
877  H_IN_M,
878  H_IN_MEAN,
879  H_OUT_DEV_F,
880  H_OUT_DEV_M,
881  D_IN_F,
882  D_IN_M,
883  D_IN_MEAN,
884  D_OUT_DEV_F,
885  D_OUT_DEV_M
886  };
887 
889  ICPDevs (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
891  cl::Memory& get (ICPDevs::Memory mem);
893  void init (unsigned int _n, Staging _staging = Staging::IO);
895  void write (ICPDevs::Memory mem = ICPDevs::Memory::D_IN_F, void *ptr = nullptr, bool block = CL_FALSE,
896  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
898  void* read (ICPDevs::Memory mem = ICPDevs::Memory::H_OUT_DEV_F, bool block = CL_TRUE,
899  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
901  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
902 
903  cl_float *hPtrInF;
904  cl_float *hPtrInM;
905  cl_float *hPtrInMean;
906  cl_float *hPtrOutDevF;
907  cl_float *hPtrOutDevM;
909  private:
910  clutils::CLEnv &env;
911  clutils::CLEnvInfo<1> info;
912  cl::Context context;
913  cl::CommandQueue queue;
914  cl::Kernel kernel;
915  cl::NDRange global;
916  Staging staging;
917  unsigned int n, d;
918  unsigned int bufferInFMSize, bufferInMeanSize, bufferOutSize;
919  cl::Buffer hBufferInF, hBufferInM, hBufferInMean, hBufferOutDF, hBufferOutDM;
920  cl::Buffer dBufferInF, dBufferInM, dBufferInMean, dBufferOutDF, dBufferOutDM;
921 
922  public:
930  template <typename period>
931  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
932  {
933  queue.enqueueNDRangeKernel (kernel, cl::NullRange, global, cl::NullRange, events, &timer.event ());
934  queue.flush (); timer.wait ();
935 
936  return timer.duration ();
937  }
938 
939  };
940 
941 
943  enum class ICPSConfig : uint8_t
944  {
945  REGULAR,
946  WEIGHTED
947  };
948 
949 
961  template <ICPSConfig C>
962  class ICPS;
963 
964 
990  template <>
992  {
993  public:
998  enum class Memory : uint8_t
999  {
1000  H_IN_DEV_M,
1001  H_IN_DEV_F,
1002  H_OUT,
1004  D_IN_DEV_M,
1005  D_IN_DEV_F,
1006  D_SIJ,
1007  D_OUT
1009  };
1010 
1012  ICPS (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
1014  cl::Memory& get (ICPS::Memory mem);
1016  void init (unsigned int _m, float _c, Staging _staging = Staging::IO);
1018  void write (ICPS::Memory mem = ICPS::Memory::D_IN_DEV_M, void *ptr = nullptr, bool block = CL_FALSE,
1019  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1021  void* read (ICPS::Memory mem = ICPS::Memory::H_OUT, bool block = CL_TRUE,
1022  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1024  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1026  float getScaling ();
1028  void setScaling (float _c);
1029 
1030  cl_float *hPtrInDevM;
1031  cl_float *hPtrInDevF;
1032  cl_float *hPtrOut;
1034  private:
1035  clutils::CLEnv &env;
1036  clutils::CLEnvInfo<1> info;
1037  cl::Context context;
1038  cl::CommandQueue queue;
1039  cl::Kernel kernel;
1040  cl::NDRange global;
1042  Staging staging;
1043  float c;
1044  unsigned int m, d;
1045  unsigned int bufferInSize, bufferSijSize, bufferOutSize;
1046  cl::Buffer hBufferInDM, hBufferInDF, hBufferSij, hBufferOut;
1047  cl::Buffer dBufferInDM, dBufferInDF, dBufferSij, dBufferOut;
1048 
1049  public:
1057  template <typename period>
1058  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
1059  {
1060  double pTime;
1061 
1062  queue.enqueueNDRangeKernel (kernel, cl::NullRange, global, cl::NullRange, events, &timer.event ());
1063  queue.flush (); timer.wait ();
1064  pTime = timer.duration ();
1065 
1066  pTime += reduceSij.run (timer);
1067 
1068  return pTime;
1069  }
1070 
1071  };
1072 
1073 
1101  template <>
1103  {
1104  public:
1109  enum class Memory : uint8_t
1110  {
1111  H_IN_DEV_M,
1112  H_IN_DEV_F,
1113  H_IN_W,
1114  H_OUT,
1116  D_IN_DEV_M,
1117  D_IN_DEV_F,
1118  D_IN_W,
1119  D_SIJ,
1120  D_OUT
1122  };
1123 
1125  ICPS (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
1127  cl::Memory& get (ICPS::Memory mem);
1129  void init (unsigned int _m, float _c, Staging _staging = Staging::IO);
1131  void write (ICPS::Memory mem = ICPS::Memory::D_IN_DEV_M, void *ptr = nullptr, bool block = CL_FALSE,
1132  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1134  void* read (ICPS::Memory mem = ICPS::Memory::H_OUT, bool block = CL_TRUE,
1135  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1137  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1139  float getScaling ();
1141  void setScaling (float _c);
1142 
1143  cl_float *hPtrInDevM;
1144  cl_float *hPtrInDevF;
1145  cl_float *hPtrInW;
1146  cl_float *hPtrOut;
1148  private:
1149  clutils::CLEnv &env;
1150  clutils::CLEnvInfo<1> info;
1151  cl::Context context;
1152  cl::CommandQueue queue;
1153  cl::Kernel kernel;
1154  cl::NDRange global;
1156  Staging staging;
1157  float c;
1158  unsigned int m, d;
1159  unsigned int bufferInFMSize, bufferInWSize, bufferSijSize, bufferOutSize;
1160  cl::Buffer hBufferInDM, hBufferInDF, hBufferInW, hBufferSij, hBufferOut;
1161  cl::Buffer dBufferInDM, dBufferInDF, dBufferInW, dBufferSij, dBufferOut;
1162 
1163  public:
1171  template <typename period>
1172  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
1173  {
1174  double pTime;
1175 
1176  queue.enqueueNDRangeKernel (kernel, cl::NullRange, global, cl::NullRange, events, &timer.event ());
1177  queue.flush (); timer.wait ();
1178  pTime = timer.duration ();
1179 
1180  pTime += reduceSij.run (timer);
1181 
1182  return pTime;
1183  }
1184 
1185  };
1186 
1187 
1189  enum class ICPTransformConfig : uint8_t
1190  {
1191  QUATERNION,
1194  MATRIX
1198  };
1199 
1200 
1210  template <ICPTransformConfig C>
1212 
1213 
1239  template <>
1241  {
1242  public:
1247  enum class Memory : uint8_t
1248  {
1249  H_IN_M,
1250  H_IN_T,
1258  H_OUT,
1259  D_IN_M,
1260  D_IN_T,
1268  D_OUT
1269  };
1270 
1272  ICPTransform (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
1274  cl::Memory& get (ICPTransform::Memory mem);
1276  void init (unsigned int _m, Staging _staging = Staging::IO);
1278  void write (ICPTransform::Memory mem = ICPTransform::Memory::D_IN_M, void *ptr = nullptr, bool block = CL_FALSE,
1279  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1281  void* read (ICPTransform::Memory mem = ICPTransform::Memory::H_OUT, bool block = CL_TRUE,
1282  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1284  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1285 
1286  cl_float *hPtrInM;
1287  cl_float *hPtrInT;
1288  cl_float *hPtrOut;
1290  private:
1291  clutils::CLEnv &env;
1292  clutils::CLEnvInfo<1> info;
1293  cl::Context context;
1294  cl::CommandQueue queue;
1295  cl::Kernel kernel;
1296  cl::NDRange global;
1297  Staging staging;
1298  unsigned int m, d;
1299  unsigned int bufferInMSize, bufferInTSize, bufferOutSize;
1300  cl::Buffer hBufferInM, hBufferInT, hBufferOut;
1301  cl::Buffer dBufferInM, dBufferInT, dBufferOut;
1302 
1303  public:
1311  template <typename period>
1312  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
1313  {
1314  queue.enqueueNDRangeKernel (kernel, cl::NullRange, global, cl::NullRange, events, &timer.event ());
1315  queue.flush (); timer.wait ();
1316 
1317  return timer.duration ();
1318  }
1319 
1320  };
1321 
1322 
1347  template <>
1349  {
1350  public:
1355  enum class Memory : uint8_t
1356  {
1357  H_IN_M,
1358  H_IN_T,
1364  H_OUT,
1365  D_IN_M,
1366  D_IN_T,
1372  D_OUT
1373  };
1374 
1376  ICPTransform (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
1378  cl::Memory& get (ICPTransform::Memory mem);
1380  void init (unsigned int _m, Staging _staging = Staging::IO);
1382  void write (ICPTransform::Memory mem = ICPTransform::Memory::D_IN_M, void *ptr = nullptr, bool block = CL_FALSE,
1383  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1385  void* read (ICPTransform::Memory mem = ICPTransform::Memory::H_OUT, bool block = CL_TRUE,
1386  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1388  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1389 
1390  cl_float *hPtrInM;
1391  cl_float *hPtrInT;
1392  cl_float *hPtrOut;
1394  private:
1395  clutils::CLEnv &env;
1396  clutils::CLEnvInfo<1> info;
1397  cl::Context context;
1398  cl::CommandQueue queue;
1399  cl::Kernel kernel;
1400  cl::NDRange global;
1401  Staging staging;
1402  unsigned int m, d;
1403  unsigned int bufferInMSize, bufferInTSize, bufferOutSize;
1404  cl::Buffer hBufferInM, hBufferInT, hBufferOut;
1405  cl::Buffer dBufferInM, dBufferInT, dBufferOut;
1406 
1407  public:
1415  template <typename period>
1416  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
1417  {
1418  queue.enqueueNDRangeKernel (kernel, cl::NullRange, global, cl::NullRange, events, &timer.event ());
1419  queue.flush (); timer.wait ();
1420 
1421  return timer.duration ();
1422  }
1423 
1424  };
1425 
1426 
1452  {
1453  public:
1458  enum class Memory : uint8_t
1459  {
1460  H_IN_S,
1463  H_IN_MEAN,
1465  H_OUT_T_K,
1473  D_IN_S,
1476  D_IN_MEAN,
1478  D_OUT_T_K
1486  };
1487 
1489  ICPPowerMethod (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _info);
1491  cl::Memory& get (ICPPowerMethod::Memory mem);
1493  void init (Staging _staging = Staging::IO);
1496  void *ptr = nullptr, bool block = CL_FALSE,
1497  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1499  void* read (ICPPowerMethod::Memory mem = ICPPowerMethod::Memory::H_OUT_T_K, bool block = CL_TRUE,
1500  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1502  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1503 
1504  cl_float *hPtrInS;
1505  cl_float *hPtrInMean;
1506  cl_float *hPtrOutTk;
1508  private:
1509  clutils::CLEnv &env;
1510  clutils::CLEnvInfo<1> info;
1511  cl::Context context;
1512  cl::CommandQueue queue;
1513  cl::Kernel kernel;
1514  cl::NDRange global;
1515  Staging staging;
1516  unsigned int bufferInSSize, bufferInMeanSize, bufferOutTkSize;
1517  cl::Buffer hBufferInS, hBufferInMean, hBufferOutTk;
1518  cl::Buffer dBufferInS, dBufferInMean, dBufferOutTk;
1519 
1520  public:
1528  template <typename period>
1529  double run (clutils::GPUTimer<period> &timer, const std::vector<cl::Event> *events = nullptr)
1530  {
1531  queue.enqueueTask (kernel, events, &timer.event ());
1532  queue.flush (); timer.wait ();
1533 
1534  return timer.duration ();
1535  }
1536 
1537  };
1538 
1539 
1544  enum class ICPStepConfigT : uint8_t
1545  {
1546  EIGEN,
1549  POWER_METHOD,
1553  JACOBI
1556  };
1557 
1558 
1560  enum class ICPStepConfigW : uint8_t
1561  {
1562  REGULAR,
1563  WEIGHTED
1564  };
1565 
1566 
1582  template <ICPStepConfigT CR, ICPStepConfigW CW>
1583  class ICPStep;
1584 
1585 
1613  template <>
1615  {
1616  public:
1621  enum class Memory : uint8_t
1622  {
1623  H_IN_F,
1624  H_IN_M,
1625  H_IO_T,
1635  D_IN_F,
1636  D_IN_M,
1637  D_IO_T,
1647  };
1648 
1650  ICPStep (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _infoRBC, clutils::CLEnvInfo<1> _infoICP);
1652  cl::Memory& get (ICPStep::Memory mem);
1654  void init (unsigned int _m, unsigned int _nr,
1655  float _a = 1e2f, float _c = 1e-6f, Staging _staging = Staging::IO);
1657  void write (ICPStep::Memory mem = ICPStep::Memory::D_IN_F, void *ptr = nullptr, bool block = CL_FALSE,
1658  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1660  void* read (ICPStep::Memory mem = ICPStep::Memory::H_IO_T, bool block = CL_TRUE,
1661  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1663  void buildRBC (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1665  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr, bool config = false);
1668  float getAlpha ();
1671  void setAlpha (float _a);
1673  float getScaling ();
1675  void setScaling (float _c);
1676 
1677  cl_float *hPtrInF;
1678  cl_float *hPtrInM;
1679  cl_float *hPtrIOT;
1682  Eigen::Matrix3f Rk;
1684  Eigen::Quaternionf qk;
1687  Eigen::Vector3f tk;
1689  cl_float sk;
1692  Eigen::Matrix3f R;
1694  Eigen::Quaternionf q;
1697  Eigen::Vector3f t;
1699  cl_float s;
1702  protected:
1703  clutils::CLEnv &env;
1704  clutils::CLEnvInfo<1> infoRBC, infoICP;
1705  cl::Context context;
1706  cl::CommandQueue queue;
1709  RBC::RBCConstruct
1710  <RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC> rbcC;
1712  RBC::RBCSearch
1713  <RBC::KernelTypeC::KINECT_R,
1714  RBC::RBCPermuteConfig::GENERIC, RBC::KernelTypeS::KINECT> rbcS;
1718 
1719  cl_float *mean, *Sij;
1720  Eigen::Vector3f mf, mm;
1721  Eigen::Matrix3f S;
1722 
1723  float a, c;
1724  unsigned int m, nr, d;
1725  unsigned int bufferFMSize, bufferTSize;
1726  cl::Buffer hBufferInF, hBufferInM, hBufferIOT;
1727  cl::Buffer dBufferInF, dBufferInM, dBufferIOT;
1728 
1729  public:
1739  template <typename period>
1740  double run (clutils::GPUTimer<period> &timer,
1741  const std::vector<cl::Event> *events = nullptr, bool config = false)
1742  {
1743  clutils::CPUTimer<double, std::milli> cTimer;
1744  double pTime = 0.0;
1745 
1746  pTime += transform.run (timer, events);
1747  pTime += rbcS.run (timer, nullptr, config);
1748  pTime += means.run (timer);
1749  pTime += devs.run (timer);
1750  pTime += matrixS.run (timer);
1751 
1752  cTimer.start ();
1753 
1754  mean = (cl_float *) means.read (ICPMean<ICPMeanConfig::REGULAR>::Memory::H_OUT, CL_FALSE);
1755  Sij = (cl_float *) matrixS.read (ICPS<ICPSConfig::REGULAR>::Memory::H_OUT);
1756  sk = std::sqrt (Sij[9] / Sij[10]);
1757 
1758  mf = Eigen::Map<Eigen::Vector3f> (mean);
1759  mm = Eigen::Map<Eigen::Vector3f> (mean + 4);
1760  S = Eigen::Map<Eigen::Matrix3f, Eigen::Unaligned, Eigen::Stride<1, 3> > (Sij);
1761 
1762  Eigen::JacobiSVD<Eigen::MatrixXf, Eigen::NoQRPreconditioner>
1763  svd (S, Eigen::ComputeThinU | Eigen::ComputeThinV);
1764 
1765  Rk = svd.matrixV () * svd.matrixU ().transpose ();
1766  if (Rk.determinant () < 0)
1767  {
1768  Eigen::Matrix3f B = Eigen::Matrix3f::Identity ();
1769  B (2, 2) = Rk.determinant ();
1770  Rk = svd.matrixV () * B * svd.matrixU ().transpose ();
1771  }
1772  qk = Eigen::Quaternionf (Rk);
1773 
1774  tk = mf - sk * Rk * mm;
1775 
1776  R = Rk * R;
1777  q = Eigen::Quaternionf (R);
1778  t = sk * Rk * t + tk;
1779  s = sk * s;
1780 
1781  Eigen::Map<Eigen::Vector4f> (hPtrIOT, 4) = q.coeffs (); // Quaternion
1782  Eigen::Map<Eigen::Vector4f> (hPtrIOT + 4, 4) = t.homogeneous (); // Translation
1783  hPtrIOT[7] = s; // Scale
1784 
1785  pTime += cTimer.stop ();
1786 
1787  queue.enqueueWriteBuffer (dBufferIOT, CL_FALSE, 0, bufferTSize, hPtrIOT, nullptr, &timer.event ());
1788  queue.flush (); timer.wait ();
1789  pTime += timer.duration ();
1790 
1791  return pTime;
1792  }
1793 
1794  };
1795 
1796 
1824  template <>
1826  {
1827  public:
1832  enum class Memory : uint8_t
1833  {
1834  H_IN_F,
1835  H_IN_M,
1836  H_IO_T,
1846  D_IN_F,
1847  D_IN_M,
1848  D_IO_T,
1858  };
1859 
1861  ICPStep (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _infoRBC, clutils::CLEnvInfo<1> _infoICP);
1863  cl::Memory& get (ICPStep::Memory mem);
1865  void init (unsigned int _m, unsigned int _nr,
1866  float _a = 1e2f, float _c = 1e-6f, Staging _staging = Staging::IO);
1868  void write (ICPStep::Memory mem = ICPStep::Memory::D_IN_F, void *ptr = nullptr, bool block = CL_FALSE,
1869  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1871  void* read (ICPStep::Memory mem = ICPStep::Memory::H_IO_T, bool block = CL_TRUE,
1872  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1874  void buildRBC (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
1876  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr, bool config = false);
1879  float getAlpha ();
1882  void setAlpha (float _a);
1884  float getScaling ();
1886  void setScaling (float _c);
1887 
1888  cl_float *hPtrInF;
1889  cl_float *hPtrInM;
1890  cl_float *hPtrIOT;
1893  Eigen::Matrix3f Rk;
1895  Eigen::Quaternionf qk;
1898  Eigen::Vector3f tk;
1900  cl_float sk;
1903  Eigen::Matrix3f R;
1905  Eigen::Quaternionf q;
1908  Eigen::Vector3f t;
1910  cl_float s;
1913  protected:
1914  clutils::CLEnv &env;
1915  clutils::CLEnvInfo<1> infoRBC, infoICP;
1916  cl::Context context;
1917  cl::CommandQueue queue;
1920  RBC::RBCConstruct
1921  <RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC> rbcC;
1923  RBC::RBCSearch
1924  <RBC::KernelTypeC::KINECT_R,
1925  RBC::RBCPermuteConfig::GENERIC, RBC::KernelTypeS::KINECT> rbcS;
1930 
1931  cl_float *mean, *Sij;
1932  Eigen::Vector3f mf, mm;
1933  Eigen::Matrix3f S;
1934 
1935  float a, c;
1936  unsigned int m, nr, d;
1937  unsigned int bufferFMSize, bufferTSize;
1938  cl::Buffer hBufferInF, hBufferInM, hBufferIOT;
1939  cl::Buffer dBufferInF, dBufferInM, dBufferIOT;
1940 
1941  public:
1951  template <typename period>
1952  double run (clutils::GPUTimer<period> &timer,
1953  const std::vector<cl::Event> *events = nullptr, bool config = false)
1954  {
1955  clutils::CPUTimer<double, std::milli> cTimer;
1956  double pTime = 0.0;
1957 
1958  pTime += transform.run (timer, events);
1959  pTime += rbcS.run (timer, nullptr, config);
1960  pTime += weights.run (timer);
1961  pTime += means.run (timer);
1962  pTime += devs.run (timer);
1963  pTime += matrixS.run (timer);
1964 
1965  cTimer.start ();
1966 
1967  mean = (cl_float *) means.read (ICPMean<ICPMeanConfig::WEIGHTED>::Memory::H_OUT, CL_FALSE);
1968  Sij = (cl_float *) matrixS.read (ICPS<ICPSConfig::WEIGHTED>::Memory::H_OUT);
1969  sk = std::sqrt (Sij[9] / Sij[10]);
1970 
1971  mf = Eigen::Map<Eigen::Vector3f> (mean);
1972  mm = Eigen::Map<Eigen::Vector3f> (mean + 4);
1973  S = Eigen::Map<Eigen::Matrix3f, Eigen::Unaligned, Eigen::Stride<1, 3> > (Sij);
1974 
1975  Eigen::JacobiSVD<Eigen::MatrixXf, Eigen::NoQRPreconditioner>
1976  svd (S, Eigen::ComputeThinU | Eigen::ComputeThinV);
1977 
1978  Rk = svd.matrixV () * svd.matrixU ().transpose ();
1979  if (Rk.determinant () < 0)
1980  {
1981  Eigen::Matrix3f B = Eigen::Matrix3f::Identity ();
1982  B (2, 2) = Rk.determinant ();
1983  Rk = svd.matrixV () * B * svd.matrixU ().transpose ();
1984  }
1985  qk = Eigen::Quaternionf (Rk);
1986 
1987  tk = mf - sk * Rk * mm;
1988 
1989  R = Rk * R;
1990  q = Eigen::Quaternionf (R);
1991  t = sk * Rk * t + tk;
1992  s = sk * s;
1993 
1994  Eigen::Map<Eigen::Vector4f> (hPtrIOT, 4) = q.coeffs (); // Quaternion
1995  Eigen::Map<Eigen::Vector4f> (hPtrIOT + 4, 4) = t.homogeneous (); // Translation
1996  hPtrIOT[7] = s; // Scale
1997 
1998  pTime += cTimer.stop ();
1999 
2000  queue.enqueueWriteBuffer (dBufferIOT, CL_FALSE, 0, bufferTSize, hPtrIOT, nullptr, &timer.event ());
2001  queue.flush (); timer.wait ();
2002  pTime += timer.duration ();
2003 
2004  return pTime;
2005  }
2006 
2007  };
2008 
2009 
2037  template <>
2039  {
2040  public:
2045  enum class Memory : uint8_t
2046  {
2047  H_IN_F,
2048  H_IN_M,
2049  H_IO_T,
2059  D_IN_F,
2060  D_IN_M,
2061  D_IO_T,
2071  };
2072 
2074  ICPStep (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _infoRBC, clutils::CLEnvInfo<1> _infoICP);
2076  cl::Memory& get (ICPStep::Memory mem);
2078  void init (unsigned int _m, unsigned int _nr,
2079  float _a = 1e2f, float _c = 1e-6f, Staging _staging = Staging::IO);
2081  void write (ICPStep::Memory mem = ICPStep::Memory::D_IN_F, void *ptr = nullptr, bool block = CL_FALSE,
2082  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
2084  void* read (ICPStep::Memory mem = ICPStep::Memory::H_IO_T, bool block = CL_TRUE,
2085  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
2087  void buildRBC (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
2089  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr, bool config = false);
2092  float getAlpha ();
2095  void setAlpha (float _a);
2097  float getScaling ();
2099  void setScaling (float _c);
2100 
2101  cl_float *hPtrInF;
2102  cl_float *hPtrInM;
2103  cl_float *hPtrIOT;
2106  Eigen::Matrix3f Rk;
2108  Eigen::Quaternionf qk;
2111  Eigen::Vector3f tk;
2113  cl_float sk;
2116  Eigen::Matrix3f R;
2118  Eigen::Quaternionf q;
2121  Eigen::Vector3f t;
2123  cl_float s;
2126  protected:
2127  clutils::CLEnv &env;
2128  clutils::CLEnvInfo<1> infoRBC, infoICP;
2129  cl::Context context;
2130  cl::CommandQueue queue;
2133  RBC::RBCConstruct
2134  <RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC> rbcC;
2136  RBC::RBCSearch
2137  <RBC::KernelTypeC::KINECT_R,
2138  RBC::RBCPermuteConfig::GENERIC, RBC::KernelTypeS::KINECT> rbcS;
2143 
2144  cl_float *Tk;
2145 
2146  float a, c;
2147  unsigned int m, nr, d;
2148  unsigned int bufferFMSize, bufferTSize;
2149  cl::Buffer hBufferInF, hBufferInM, hBufferIOT;
2150  cl::Buffer dBufferInF, dBufferInM, dBufferIOT;
2151 
2152  public:
2162  template <typename period>
2163  double run (clutils::GPUTimer<period> &timer,
2164  const std::vector<cl::Event> *events = nullptr, bool config = false)
2165  {
2166  clutils::CPUTimer<double, std::milli> cTimer;
2167  double pTime = 0.0;
2168 
2169  pTime += transform.run (timer, events);
2170  pTime += rbcS.run (timer, nullptr, config);
2171  pTime += means.run (timer);
2172  pTime += devs.run (timer);
2173  pTime += matrixS.run (timer);
2174  pTime += powMethod.run (timer);
2175 
2176  cTimer.start ();
2177 
2178  Tk = (cl_float *) powMethod.read (ICPPowerMethod::Memory::H_OUT_T_K);
2179 
2180  qk = Eigen::Quaternionf (Tk);
2181  Rk = Eigen::Matrix3f (qk);
2182  tk = Eigen::Map<Eigen::Vector3f> (Tk + 4, 3);
2183  sk = Tk[7];
2184 
2185  R = Rk * R;
2186  q = Eigen::Quaternionf (R);
2187  t = sk * Rk * t + tk;
2188  s = sk * s;
2189 
2190  Eigen::Map<Eigen::Vector4f> (hPtrIOT, 4) = q.coeffs (); // Quaternion
2191  Eigen::Map<Eigen::Vector4f> (hPtrIOT + 4, 4) = t.homogeneous (); // Translation
2192  hPtrIOT[7] = s; // Scale
2193 
2194  pTime += cTimer.stop ();
2195 
2196  queue.enqueueWriteBuffer (dBufferIOT, CL_FALSE, 0, bufferTSize, hPtrIOT, nullptr, &timer.event ());
2197  queue.flush (); timer.wait ();
2198  pTime += timer.duration ();
2199 
2200  return pTime;
2201  }
2202 
2203  };
2204 
2205 
2233  template <>
2235  {
2236  public:
2241  enum class Memory : uint8_t
2242  {
2243  H_IN_F,
2244  H_IN_M,
2245  H_IO_T,
2255  D_IN_F,
2256  D_IN_M,
2257  D_IO_T,
2267  };
2268 
2270  ICPStep (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _infoRBC, clutils::CLEnvInfo<1> _infoICP);
2272  cl::Memory& get (ICPStep::Memory mem);
2274  void init (unsigned int _m, unsigned int _nr,
2275  float _a = 1e2f, float _c = 1e-6f, Staging _staging = Staging::IO);
2277  void write (ICPStep::Memory mem = ICPStep::Memory::D_IN_F, void *ptr = nullptr, bool block = CL_FALSE,
2278  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
2280  void* read (ICPStep::Memory mem = ICPStep::Memory::H_IO_T, bool block = CL_TRUE,
2281  const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
2283  void buildRBC (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
2285  void run (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr, bool config = false);
2288  float getAlpha ();
2291  void setAlpha (float _a);
2293  float getScaling ();
2295  void setScaling (float _c);
2296 
2297  cl_float *hPtrInF;
2298  cl_float *hPtrInM;
2299  cl_float *hPtrIOT;
2302  Eigen::Matrix3f Rk;
2304  Eigen::Quaternionf qk;
2307  Eigen::Vector3f tk;
2309  cl_float sk;
2312  Eigen::Matrix3f R;
2314  Eigen::Quaternionf q;
2317  Eigen::Vector3f t;
2319  cl_float s;
2322  protected:
2323  clutils::CLEnv &env;
2324  clutils::CLEnvInfo<1> infoRBC, infoICP;
2325  cl::Context context;
2326  cl::CommandQueue queue;
2329  RBC::RBCConstruct
2330  <RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC> rbcC;
2332  RBC::RBCSearch
2333  <RBC::KernelTypeC::KINECT_R,
2334  RBC::RBCPermuteConfig::GENERIC, RBC::KernelTypeS::KINECT> rbcS;
2340 
2341  cl_float *Tk;
2342 
2343  float a, c;
2344  unsigned int m, nr, d;
2345  unsigned int bufferFMSize, bufferTSize;
2346  cl::Buffer hBufferInF, hBufferInM, hBufferIOT;
2347  cl::Buffer dBufferInF, dBufferInM, dBufferIOT;
2348 
2349  public:
2359  template <typename period>
2360  double run (clutils::GPUTimer<period> &timer,
2361  const std::vector<cl::Event> *events = nullptr, bool config = false)
2362  {
2363  clutils::CPUTimer<double, std::milli> cTimer;
2364  double pTime = 0.0;
2365 
2366  pTime += transform.run (timer, events);
2367  pTime += rbcS.run (timer, nullptr, config);
2368  pTime += weights.run (timer);
2369  pTime += means.run (timer);
2370  pTime += devs.run (timer);
2371  pTime += matrixS.run (timer);
2372  pTime += powMethod.run (timer);
2373 
2374  cTimer.start ();
2375 
2376  Tk = (cl_float *) powMethod.read (ICPPowerMethod::Memory::H_OUT_T_K);
2377 
2378  qk = Eigen::Quaternionf (Tk);
2379  Rk = Eigen::Matrix3f (qk);
2380  tk = Eigen::Map<Eigen::Vector3f> (Tk + 4, 3);
2381  sk = Tk[7];
2382 
2383  R = Rk * R;
2384  q = Eigen::Quaternionf (R);
2385  t = sk * Rk * t + tk;
2386  s = sk * s;
2387 
2388  Eigen::Map<Eigen::Vector4f> (hPtrIOT, 4) = q.coeffs (); // Quaternion
2389  Eigen::Map<Eigen::Vector4f> (hPtrIOT + 4, 4) = t.homogeneous (); // Translation
2390  hPtrIOT[7] = s; // Scale
2391 
2392  pTime += cTimer.stop ();
2393 
2394  queue.enqueueWriteBuffer (dBufferIOT, CL_FALSE, 0, bufferTSize, hPtrIOT, nullptr, &timer.event ());
2395  queue.flush (); timer.wait ();
2396  pTime += timer.duration ();
2397 
2398  return pTime;
2399  }
2400 
2401  };
2402 
2403 
2433  template <ICPStepConfigT CR, ICPStepConfigW CW>
2434  class ICP : public ICPStep<CR, CW>
2435  {
2436  public:
2438  ICP (clutils::CLEnv &_env, clutils::CLEnvInfo<1> _infoRBC, clutils::CLEnvInfo<1> _infoICP);
2440  void init (unsigned int _m, unsigned int _nr, float _a = 1e2f, float _c = 1e-6f,
2441  unsigned int _max_iterations = 40, double _angle_threshold = 0.001,
2442  double _translation_threshold = 0.01, Staging _staging = Staging::IO);
2444  void buildRBC (const std::vector<cl::Event> *events = nullptr, cl::Event *event = nullptr);
2446  void run ();
2448  unsigned int getMaxIterations ();
2450  void setMaxIterations (unsigned int _max_iterations);
2452  double getAngleThreshold ();
2454  void setAngleThreshold (double _angle_threshold);
2456  double getTranslationThreshold ();
2458  void setTranslationThreshold (double _translation_threshold);
2459 
2462  unsigned int k;
2463 
2464  protected:
2466  inline bool check ();
2467 
2469  unsigned int max_iterations;
2474 
2475  public:
2482  template <typename period>
2483  double run (clutils::GPUTimer<period> &timer)
2484  {
2485  clutils::ProfilingInfo<40> steps ("Steps");
2486 
2487  steps[0] = ICPStep<CR, CW>::run (timer, nullptr, true);
2488  for (int i = 1; i < 40; ++i)
2489  steps[i] = ICPStep<CR, CW>::run (timer);
2490 
2491  steps.print ("ICP");
2492 
2493  return steps.total ();
2494  }
2495 
2496  };
2497 
2498 }
2499 }
2500 
2501 #endif // ICP_ALGORITHMS_HPP
cl_float * hPtrInF
Definition: algorithms.hpp:783
ICPMean< ICPMeanConfig::REGULAR > means
Definition: algorithms.hpp:2139
clutils::CLEnv & env
Definition: algorithms.hpp:1703
rbc_dist_id * hPtrIn
Definition: algorithms.hpp:520
RBC::RBCSearch< RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC, RBC::KernelTypeS::KINECT > rbcS
Definition: algorithms.hpp:2334
cl_float * hPtrInW
Definition: algorithms.hpp:1145
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr, bool config=false)
Executes the necessary kernels.
Definition: algorithms.hpp:1740
RBC::RBCConstruct< RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC > rbcC
Definition: algorithms.hpp:2330
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:404
ICPWeights(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:982
ICPMean< ICPMeanConfig::WEIGHTED > means
Definition: algorithms.hpp:2336
ICPStepConfigW
Enumerates configurations for the ICPStep class.
Definition: algorithms.hpp:1560
void run()
Executes the necessary kernels.
Definition: algorithms.cpp:4807
ICPStepConfigT
Enumerates configurations for the ICPStep class.
Definition: algorithms.hpp:1544
Eigen::Quaternionf qk
Definition: algorithms.hpp:1895
cl_float * hPtrInMean
Definition: algorithms.hpp:905
Eigen::Quaternionf q
Definition: algorithms.hpp:1905
void init(Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:3007
Offers functions that are serial CPU implementations of the relevant algorithms in the ICP pipeline...
Definition: helper_funcs.hpp:47
void setTranslationThreshold(double _translation_threshold)
Sets the threshold for the change in translation.
Definition: algorithms.cpp:4890
ICPTransform< ICPTransformConfig::QUATERNION > transform
Definition: algorithms.hpp:2135
Interface class for the icpComputeReduceWeights kernel.
Definition: algorithms.hpp:485
clutils::CLEnvInfo< 1 > infoRBC
Definition: algorithms.hpp:2128
void init(Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:659
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:319
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:1529
ICPS< ICPSConfig::REGULAR > matrixS
Definition: algorithms.hpp:1717
cl_float * hPtrOutW
Definition: algorithms.hpp:521
ReduceConfig
Enumerates configurations for the Reduce class.
Definition: algorithms.hpp:52
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:1416
void * read(Scan::Memory mem=Scan::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:567
cl_float * hPtrOutTk
Definition: algorithms.hpp:1506
void init(unsigned int _n, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:1847
RBC::RBCConstruct< RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC > rbcC
Definition: algorithms.hpp:1710
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:3146
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:141
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:1780
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:591
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:2519
ICPTransformConfig
Enumerates configurations for the ICPTransform class.
Definition: algorithms.hpp:1189
void * read(ICPWeights::Memory mem=ICPWeights::Memory::H_OUT_SUM_W, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:1198
ICPTransform< ICPTransformConfig::QUATERNION > transform
Definition: algorithms.hpp:2331
cl_float * hPtrInDevM
Definition: algorithms.hpp:1030
void * read(ICPReps::Memory mem=ICPReps::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:950
cl_float * hPtrInM
Definition: algorithms.hpp:1390
Interface class for the scan kernels.
Definition: algorithms.hpp:200
cl_float * hPtrInDevF
Definition: algorithms.hpp:1031
clutils::CLEnvInfo< 1 > infoRBC
Definition: algorithms.hpp:1915
cl_float * hPtrInS
Definition: algorithms.hpp:1504
RBC::RBCConstruct< RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC > rbcC
Definition: algorithms.hpp:1921
void write(Reduce::Memory mem=Reduce::Memory::D_IN, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:258
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:1109
Interface class for the getLMs kernel.
Definition: algorithms.hpp:312
ICP(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _infoRBC, clutils::CLEnvInfo< 1 > _infoICP)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:4751
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:2751
RBC::RBCSearch< RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC, RBC::KernelTypeS::KINECT > rbcS
Definition: algorithms.hpp:1714
void buildRBC(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Builds the RBC data structure.
Definition: algorithms.cpp:4793
cl_float * hPtrOut
Definition: algorithms.hpp:1392
cl::CommandQueue queue
Definition: algorithms.hpp:1706
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:1621
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:998
void * read(ICPMean::Memory mem=ICPMean::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:1757
T * hPtrIn
Definition: algorithms.hpp:115
Interface class for calculating the S matrix and the s scale factor constituents, while considering w...
Definition: algorithms.hpp:1102
void write(ICPWeights::Memory mem=ICPWeights::Memory::D_IN, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:1170
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:548
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:451
void init(unsigned int _nr, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:830
Offers classes which set up kernel execution parameters and provide interfaces for the handling of me...
Definition: algorithms.hpp:45
void * read(ICPMean::Memory mem=ICPMean::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:1458
cl_float * hPtrIn
Definition: algorithms.hpp:342
cl_float * hPtrOutDevM
Definition: algorithms.hpp:907
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:751
double translation_threshold
Threshold for the change in translation (in mm) in the transformation.
Definition: algorithms.hpp:2473
cl_float * hPtrInM
Definition: algorithms.hpp:784
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:1224
ICPTransform< ICPTransformConfig::QUATERNION > transform
Definition: algorithms.hpp:1711
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:492
Eigen::Quaternionf qk
Definition: algorithms.hpp:1684
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:91
Declares classes used by the OpenCL interface classes in cl_algo.
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:973
cl_float * hPtrOut
Definition: algorithms.hpp:343
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr, bool config=false)
Executes the necessary kernels.
Definition: algorithms.hpp:2163
Reduce(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
void init(unsigned int _n, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:1029
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:207
ICPSConfig
Enumerates configurations for the ICPS class.
Definition: algorithms.hpp:943
cl_float * hPtrInDevF
Definition: algorithms.hpp:1144
cl_double * hPtrInSW
Definition: algorithms.hpp:786
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:2045
T * hPtrIn
Definition: algorithms.hpp:231
ICPMean< ICPMeanConfig::REGULAR > means
Definition: algorithms.hpp:1715
double getAngleThreshold()
Gets the threshold for the change in angle.
Definition: algorithms.cpp:4858
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr, bool config=false)
Executes the necessary kernels.
Definition: algorithms.hpp:2360
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:632
void init(unsigned int _cols, unsigned int _rows, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:132
ScanConfig
Enumerates configurations for the Scan class.
Definition: algorithms.hpp:169
RBC::RBCConstruct< RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC > rbcC
Definition: algorithms.hpp:2134
RBC::RBCSearch< RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC, RBC::KernelTypeS::KINECT > rbcS
Definition: algorithms.hpp:2138
clutils::CLEnvInfo< 1 > infoRBC
Definition: algorithms.hpp:2324
T * hPtrOut
Definition: algorithms.hpp:116
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:782
ICPLMs(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:621
Interface class for calculating the S matrix and the s scale factor constituents. ...
Definition: algorithms.hpp:962
ICPMean< ICPMeanConfig::WEIGHTED > means
Definition: algorithms.hpp:1927
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:1058
Staging
Enumerates staging buffer configurations.
Definition: common.hpp:43
RBC::RBCSearch< RBC::KernelTypeC::KINECT_R, RBC::RBCPermuteConfig::GENERIC, RBC::KernelTypeS::KINECT > rbcS
Definition: algorithms.hpp:1925
cl_float * hPtrInT
Definition: algorithms.hpp:1287
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:874
cl_float * hPtrInMean
Definition: algorithms.hpp:1505
cl_float * hPtrOut
Definition: algorithms.hpp:1288
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:366
cl_float * hPtrInT
Definition: algorithms.hpp:1391
ICPDevs(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:1797
void * read(Reduce::Memory mem=Reduce::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:287
Interface class for the icpTransform kernels.
Definition: algorithms.hpp:1211
Scan(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:2251
ICPMeanConfig
Enumerates configurations for the ICPMean class.
Definition: algorithms.hpp:576
cl_float * hPtrOut
Definition: algorithms.hpp:662
Eigen::Quaternionf q
Definition: algorithms.hpp:1694
void * read(ICPDevs::Memory mem=ICPDevs::Memory::H_OUT_DEV_F, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:2003
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:1458
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:813
ICPS< ICPSConfig::REGULAR > matrixS
Definition: algorithms.hpp:2141
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:311
void init(unsigned int _m, unsigned int _nr, float _a=1e2f, float _c=1e-6f, unsigned int _max_iterations=40, double _angle_threshold=0.001, double _translation_threshold=0.01, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:4778
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:2029
cl_float * hPtrInM
Definition: algorithms.hpp:1286
cl_float * hPtrOut
Definition: algorithms.hpp:787
void write(ICPReps::Memory mem=ICPReps::Memory::D_IN, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:922
cl_float * hPtrInM
Definition: algorithms.hpp:661
Interface class for the icpMean kernel.
Definition: algorithms.hpp:625
void * read(ICPLMs::Memory mem=ICPLMs::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:759
cl_float * hPtrInDevM
Definition: algorithms.hpp:1143
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:1172
unsigned int getMaxIterations()
Gets the maximum number of iterations.
Definition: algorithms.cpp:4839
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:2241
cl::CommandQueue queue
Definition: algorithms.hpp:1917
cl_float * hPtrOutDevF
Definition: algorithms.hpp:906
void init(unsigned int _cols, unsigned int _rows, Staging _staging=Staging::IO)
Configures kernel execution parameters.
Definition: algorithms.cpp:403
unsigned int k
Current iteration number.
Definition: algorithms.hpp:2462
cl_float * hPtrInM
Definition: algorithms.hpp:904
Interface class for the icpTransform_Matrix kernel.
Definition: algorithms.hpp:1451
void write(ICPPowerMethod::Memory mem=ICPPowerMethod::Memory::D_IN_S, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:3090
double angle_threshold
Threshold for the change in angle (in degrees) in the transformation.
Definition: algorithms.hpp:2471
ICPS< ICPSConfig::WEIGHTED > matrixS
Definition: algorithms.hpp:2338
cl_float * hPtrIn
Definition: algorithms.hpp:427
cl_float * hPtrInW
Definition: algorithms.hpp:785
cl_float * hPtrOut
Definition: algorithms.hpp:1146
void * read(ICPPowerMethod::Memory mem=ICPPowerMethod::Memory::H_OUT_T_K, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:3123
Interface class for the icpTransform_Quaternion kernel.
Definition: algorithms.hpp:1240
T * hPtrOut
Definition: algorithms.hpp:232
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:257
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:1312
ICPS< ICPSConfig::WEIGHTED > matrixS
Definition: algorithms.hpp:1929
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:1355
ICPReps(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:791
void * read(ICPS::Memory mem=ICPS::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:2496
bool check()
Performs the convergence check.
Definition: algorithms.cpp:4824
double run(clutils::GPUTimer< period > &timer)
Executes the necessary kernels.
Definition: algorithms.hpp:2483
Interface class for calculating the S matrix and the s scale factor constituents, while considering r...
Definition: algorithms.hpp:991
void write(Scan::Memory mem=Scan::Memory::D_IN, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:538
unsigned int max_iterations
Maximum number of iterations that a registration process is allowed to perform.
Definition: algorithms.hpp:2469
void write(ICPDevs::Memory mem=ICPDevs::Memory::D_IN_F, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:1965
ICPPowerMethod(clutils::CLEnv &_env, clutils::CLEnvInfo< 1 > _info)
Configures an OpenCL environment as specified by _info.
Definition: algorithms.cpp:2966
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:1832
Interface class for the ICP pipeline.
Definition: algorithms.hpp:1583
void run(const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Executes the necessary kernels.
Definition: algorithms.cpp:1481
cl_float * hPtrInF
Definition: algorithms.hpp:660
Interface class for the reduce kernels.
Definition: algorithms.hpp:84
double getTranslationThreshold()
Gets the threshold for the change in translation.
Definition: algorithms.cpp:4878
void write(ICPLMs::Memory mem=ICPLMs::Memory::D_IN, void *ptr=nullptr, bool block=CL_FALSE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a device buffer.
Definition: algorithms.cpp:731
Interface class for the icpSubtractMean kernel.
Definition: algorithms.hpp:867
cl_double * hPtrOutSW
Definition: algorithms.hpp:522
ICPTransform< ICPTransformConfig::QUATERNION > transform
Definition: algorithms.hpp:1922
void setMaxIterations(unsigned int _max_iterations)
Sets the maximum number of iterations.
Definition: algorithms.cpp:4850
clutils::CLEnvInfo< 1 > infoRBC
Definition: algorithms.hpp:1704
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:931
cl_float * hPtrOut
Definition: algorithms.hpp:1032
void setAngleThreshold(double _angle_threshold)
Sets the threshold for the change in angle.
Definition: algorithms.cpp:4870
Interface class for the icpMean_Weighted kernel.
Definition: algorithms.hpp:744
Memory
Enumerates the memory objects handled by the class.
Definition: algorithms.hpp:1247
cl_float * hPtrOut
Definition: algorithms.hpp:428
Interface class for the getReps kernel.
Definition: algorithms.hpp:397
Interface class for the calculation of the fixed and moving set means.
Definition: algorithms.hpp:596
cl_float * hPtrInF
Definition: algorithms.hpp:903
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr)
Executes the necessary kernels.
Definition: algorithms.hpp:687
double run(clutils::GPUTimer< period > &timer, const std::vector< cl::Event > *events=nullptr, bool config=false)
Executes the necessary kernels.
Definition: algorithms.hpp:1952
void * read(ICPS::Memory mem=ICPS::Memory::H_OUT, bool block=CL_TRUE, const std::vector< cl::Event > *events=nullptr, cl::Event *event=nullptr)
Performs a data transfer to a staging buffer.
Definition: algorithms.cpp:2228