Add zlmediakit.

2024-03-13 18:01:36 +08:00
parent f34bfbc22f
commit 80b76f410e
260 changed files with 98240 additions and 23 deletions
--- a/.vscode/c_cpp_properties.json
+++ b/.vscode/c_cpp_properties.json
@ -4,7 +4,9 @@
            "name": "Linux",
            "includePath": [
                "${workspaceFolder}/**",
                "${workspaceFolder}/3rdparty/ds_pedestrian_mot_hisi/include",
                "/opt/Libraries/boost_1_84_0/include",
                "/opt/aarch64-v01c01-linux-gnu-gcc/lib/ZLMediaKit/include",
                "build/_deps/kylin-src/Universal"
            ],
            "defines": [],
--- a/3rdparty/ds_pedestrian_mot_hisi/include/ds_pedestrian_mot_hisi.h
+++ b/3rdparty/ds_pedestrian_mot_hisi/include/ds_pedestrian_mot_hisi.h
@ -0,0 +1,140 @@
 /*
 * @Author: Alfred Xiang Wu
 * @Date: 2022-11-20 22:18:49
 * @Breif: 
 * @Last Modified by: Alfred Xiang Wu
 * @Last Modified time: 2023-09-19 22:32:56
 */
 #ifndef _DS_PEDESTRIAN_MOT_HISI_H_
 #define _DS_PEDESTRIAN_MOT_HISI_H_
 #include <iostream>
 #include <memory>
 #include <string>
 #include <vector>
 #include <stdint.h>  // int64_t
 #define RW_LICENSE_CHECK
 #if defined _WINDOWS && !(defined _LIB)
    #ifdef DS_PEDESTRIAN_MOT_HISI_EXPORTS
        #define DS_PEDESTRIAN_MOT_HISI_API __declspec(dllexport)
    #else
        #define DS_PEDESTRIAN_MOT_HISI_API __declspec(dllimport)
    #endif
 #else
    #define DS_PEDESTRIAN_MOT_HISI_API
 #endif
 typedef void* DS_PEDESTRIAN_MOT_HANDLE;
 // for detection
 typedef struct PedestrianRect {
    float xmin;
    float ymin;
    float xmax;
    float ymax;
    float score;
    int label;
 } PedestrianRect;
 // for tracking
 typedef struct io_MEASURE {
    std::vector<float> state;       // bbox: xmin, ymin, xmax, ymax
    int frame;                      // frame id
    float conf;                     // confidence for bbox
 } io_MEASURE;
 typedef struct io_person {
    io_MEASURE head;                // head bbox (don't use head bbox for any analysis.)
    io_MEASURE body;                // body bbox
    int tracked = 0;                // track state
    float quality_score = 0.0;      // quality scores
 } io_person;
 typedef struct io_TrackData {
    int track_id;                   // track id
    int track_state;                // track state: 0 pre-track; +1 confirmed; -1 hibernate; -2 remove;
    io_person prediction;           // bbox at the current frame (If track_state != 1, prediction is empty.)
 } io_TrackData;
 #if defined(__cplusplus)
 extern "C"
 {
 #endif
 #ifdef RW_LICENSE_CHECK
    DS_PEDESTRIAN_MOT_HISI_API void ds_pedestrian_hisi_set_lic_path(const char *path);
 #endif // RW_LICENSE_CHECK
    /**
     * The function is used to initialize the pedestrian detection and tracking handle.
     * 
     * @param handle the handle
     * @param det_model_path The path to the pedestrian detection model.
     * @param reid_model_path the path of the pedestrian tracking reid model
     * @param width width of the input image
     * @param height the height of the input image
     * @param channels the number of channels of the input image, It must be 3 channels.
     * 
     * @return A handle for the pedestrian detection and tracking.
     */
    DS_PEDESTRIAN_MOT_HISI_API int ds_pedestrian_hisi_init(
            DS_PEDESTRIAN_MOT_HANDLE* handle, 
            char* det_model_path,
            char* reid_model_path,
            const int width,
            const int height,
            const int channels);
    /**
     * The function is the interface function for pedestrian detection.
     * 
     * @param handle The handle.
     * @param bboxes the output of the pedestrian detection model, which is a vector of PedestrianRect.
     * 
     * @return the bounding boxes of the detected pedestrians.
     */
    DS_PEDESTRIAN_MOT_HISI_API int ds_pedestrian_det_hisi(
        DS_PEDESTRIAN_MOT_HANDLE handle, 
        unsigned char* img,
        std::vector<PedestrianRect> &bboxes);
    /**
     * The function is the interface function for pedestrian tracking.
     * 
     * @param handle The handle.
     * @param nframe the frame number of the current frame
     * @param det_bboxes the detection results of the current frame, which is a vector of PedestrianRect.
     * @param tracks the output of the tracking results, which is a vector of io_TrackData.
     * 
     * @return the number of tracks.
     */
    DS_PEDESTRIAN_MOT_HISI_API int ds_pedestrian_track_hisi(
        DS_PEDESTRIAN_MOT_HANDLE handle, 
        unsigned char* img,
        const int nframe, 
        std::vector<PedestrianRect> det_bboxes, 
        std::vector<io_TrackData> &tracks);
    /**
     * It releases the resources allocated in the initialization function.
     * 
     * @param handle The handle.
     * 
     * @return The handle.
     */
    DS_PEDESTRIAN_MOT_HISI_API int ds_pedestrian_hisi_release(
        DS_PEDESTRIAN_MOT_HANDLE *handle);
 #if defined(__cplusplus)
 }
 #endif
 #endif // _DS_PEDESTRIAN_MOT_HISI_H_
--- a/3rdparty/ds_pedestrian_mot_hisi/libs/libds_pedestrian_mot_Hi3516DV500.so
+++ b/3rdparty/ds_pedestrian_mot_hisi/libs/libds_pedestrian_mot_Hi3516DV500.so
--- a/3rdparty/ds_pedestrian_mot_hisi/models/ds_mot_m0_2000.bin
+++ b/3rdparty/ds_pedestrian_mot_hisi/models/ds_mot_m0_2000.bin
--- a/3rdparty/ds_pedestrian_mot_hisi/models/ds_mot_m1_2000.bin
+++ b/3rdparty/ds_pedestrian_mot_hisi/models/ds_mot_m1_2000.bin
--- a/3rdparty/libopencv/include/opencv/cv.h
+++ b/3rdparty/libopencv/include/opencv/cv.h
@ -0,0 +1,73 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OLD_CV_H
 #define OPENCV_OLD_CV_H
 #if defined(_MSC_VER)
    #define CV_DO_PRAGMA(x) __pragma(x)
    #define __CVSTR2__(x) #x
    #define __CVSTR1__(x) __CVSTR2__(x)
    #define __CVMSVCLOC__ __FILE__ "("__CVSTR1__(__LINE__)") : "
    #define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (__CVMSVCLOC__ _msg))
 #elif defined(__GNUC__)
    #define CV_DO_PRAGMA(x) _Pragma (#x)
    #define CV_MSG_PRAGMA(_msg) CV_DO_PRAGMA(message (_msg))
 #else
    #define CV_DO_PRAGMA(x)
    #define CV_MSG_PRAGMA(_msg)
 #endif
 #define CV_WARNING(x) CV_MSG_PRAGMA("Warning: " #x)
 //CV_WARNING("This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module")
 #include "opencv2/core/core_c.h"
 #include "opencv2/imgproc/imgproc_c.h"
 #include "opencv2/photo/photo_c.h"
 #include "opencv2/video/tracking_c.h"
 #include "opencv2/objdetect/objdetect_c.h"
 #if !defined(CV_IMPL)
 #define CV_IMPL extern "C"
 #endif //CV_IMPL
 #endif // __OPENCV_OLD_CV_H_
--- a/3rdparty/libopencv/include/opencv/cv.hpp
+++ b/3rdparty/libopencv/include/opencv/cv.hpp
@ -0,0 +1,60 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OLD_CV_HPP
 #define OPENCV_OLD_CV_HPP
 //#if defined(__GNUC__)
 //#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
 //#endif
 #include "cv.h"
 #include "opencv2/core.hpp"
 #include "opencv2/imgproc.hpp"
 #include "opencv2/photo.hpp"
 #include "opencv2/video.hpp"
 #include "opencv2/highgui.hpp"
 #include "opencv2/features2d.hpp"
 #include "opencv2/calib3d.hpp"
 #include "opencv2/objdetect.hpp"
 #endif
--- a/3rdparty/libopencv/include/opencv/cvaux.h
+++ b/3rdparty/libopencv/include/opencv/cvaux.h
@ -0,0 +1,57 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                        Intel License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of Intel Corporation may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OLD_AUX_H
 #define OPENCV_OLD_AUX_H
 //#if defined(__GNUC__)
 //#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
 //#endif
 #include "opencv2/core/core_c.h"
 #include "opencv2/imgproc/imgproc_c.h"
 #include "opencv2/photo/photo_c.h"
 #include "opencv2/video/tracking_c.h"
 #include "opencv2/objdetect/objdetect_c.h"
 #endif
 /* End of file. */
--- a/3rdparty/libopencv/include/opencv/cvaux.hpp
+++ b/3rdparty/libopencv/include/opencv/cvaux.hpp
@ -0,0 +1,52 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                        Intel License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of Intel Corporation may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OLD_AUX_HPP
 #define OPENCV_OLD_AUX_HPP
 //#if defined(__GNUC__)
 //#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
 //#endif
 #include "cvaux.h"
 #include "opencv2/core/utility.hpp"
 #endif
--- a/3rdparty/libopencv/include/opencv/cvwimage.h
+++ b/3rdparty/libopencv/include/opencv/cvwimage.h
@ -0,0 +1,46 @@
 ///////////////////////////////////////////////////////////////////////////////
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to
 //  this license.  If you do not agree to this license, do not download,
 //  install, copy or use the software.
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2008, Google, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //  * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //  * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //  * The name of Intel Corporation or contributors may not be used to endorse
 //     or promote products derived from this software without specific
 //     prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is"
 // and any express or implied warranties, including, but not limited to, the
 // implied warranties of merchantability and fitness for a particular purpose
 // are disclaimed. In no event shall the Intel Corporation or contributors be
 // liable for any direct, indirect, incidental, special, exemplary, or
 // consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 #ifndef OPENCV_OLD_WIMAGE_HPP
 #define OPENCV_OLD_WIMAGE_HPP
 #include "opencv2/core/wimage.hpp"
 #endif
--- a/3rdparty/libopencv/include/opencv/cxcore.h
+++ b/3rdparty/libopencv/include/opencv/cxcore.h
@ -0,0 +1,52 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OLD_CXCORE_H
 #define OPENCV_OLD_CXCORE_H
 //#if defined(__GNUC__)
 //#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
 //#endif
 #include "opencv2/core/core_c.h"
 #endif
--- a/3rdparty/libopencv/include/opencv/cxcore.hpp
+++ b/3rdparty/libopencv/include/opencv/cxcore.hpp
@ -0,0 +1,53 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OLD_CXCORE_HPP
 #define OPENCV_OLD_CXCORE_HPP
 //#if defined(__GNUC__)
 //#warning "This is a deprecated opencv header provided for compatibility. Please include a header from a corresponding opencv module"
 //#endif
 #include "cxcore.h"
 #include "opencv2/core.hpp"
 #endif
--- a/3rdparty/libopencv/include/opencv/cxeigen.hpp
+++ b/3rdparty/libopencv/include/opencv/cxeigen.hpp
@ -0,0 +1,48 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OLD_EIGEN_HPP
 #define OPENCV_OLD_EIGEN_HPP
 #include "opencv2/core/eigen.hpp"
 #endif
--- a/3rdparty/libopencv/include/opencv/cxmisc.h
+++ b/3rdparty/libopencv/include/opencv/cxmisc.h
@ -0,0 +1,8 @@
 #ifndef OPENCV_OLD_CXMISC_H
 #define OPENCV_OLD_CXMISC_H
 #ifdef __cplusplus
 #  include "opencv2/core/utility.hpp"
 #endif
 #endif
--- a/3rdparty/libopencv/include/opencv/highgui.h
+++ b/3rdparty/libopencv/include/opencv/highgui.h
@ -0,0 +1,48 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                        Intel License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of Intel Corporation may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OLD_HIGHGUI_H
 #define OPENCV_OLD_HIGHGUI_H
 #include "opencv2/core/core_c.h"
 #include "opencv2/highgui/highgui_c.h"
 #endif
--- a/3rdparty/libopencv/include/opencv/ml.h
+++ b/3rdparty/libopencv/include/opencv/ml.h
@ -0,0 +1,47 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                        Intel License Agreement
 //
 // Copyright (C) 2000, Intel Corporation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of Intel Corporation may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OLD_ML_H
 #define OPENCV_OLD_ML_H
 #include "opencv2/core/core_c.h"
 #include "opencv2/ml.hpp"
 #endif
--- a/3rdparty/libopencv/include/opencv2/calib3d.hpp
+++ b/3rdparty/libopencv/include/opencv2/calib3d.hpp
--- a/3rdparty/libopencv/include/opencv2/calib3d/calib3d.hpp
+++ b/3rdparty/libopencv/include/opencv2/calib3d/calib3d.hpp
@ -0,0 +1,48 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifdef __OPENCV_BUILD
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 #include "opencv2/calib3d.hpp"
--- a/3rdparty/libopencv/include/opencv2/calib3d/calib3d_c.h
+++ b/3rdparty/libopencv/include/opencv2/calib3d/calib3d_c.h
@ -0,0 +1,427 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CALIB3D_C_H
 #define OPENCV_CALIB3D_C_H
 #include "opencv2/core/core_c.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /** @addtogroup calib3d_c
  @{
  */
 /****************************************************************************************\
 *                      Camera Calibration, Pose Estimation and Stereo                    *
 \****************************************************************************************/
 typedef struct CvPOSITObject CvPOSITObject;
 /* Allocates and initializes CvPOSITObject structure before doing cvPOSIT */
 CVAPI(CvPOSITObject*)  cvCreatePOSITObject( CvPoint3D32f* points, int point_count );
 /* Runs POSIT (POSe from ITeration) algorithm for determining 3d position of
   an object given its model and projection in a weak-perspective case */
 CVAPI(void)  cvPOSIT(  CvPOSITObject* posit_object, CvPoint2D32f* image_points,
                       double focal_length, CvTermCriteria criteria,
                       float* rotation_matrix, float* translation_vector);
 /* Releases CvPOSITObject structure */
 CVAPI(void)  cvReleasePOSITObject( CvPOSITObject**  posit_object );
 /* updates the number of RANSAC iterations */
 CVAPI(int) cvRANSACUpdateNumIters( double p, double err_prob,
                                   int model_points, int max_iters );
 CVAPI(void) cvConvertPointsHomogeneous( const CvMat* src, CvMat* dst );
 /* Calculates fundamental matrix given a set of corresponding points */
 #define CV_FM_7POINT 1
 #define CV_FM_8POINT 2
 #define CV_LMEDS 4
 #define CV_RANSAC 8
 #define CV_FM_LMEDS_ONLY  CV_LMEDS
 #define CV_FM_RANSAC_ONLY CV_RANSAC
 #define CV_FM_LMEDS CV_LMEDS
 #define CV_FM_RANSAC CV_RANSAC
 enum
 {
    CV_ITERATIVE = 0,
    CV_EPNP = 1, // F.Moreno-Noguer, V.Lepetit and P.Fua "EPnP: Efficient Perspective-n-Point Camera Pose Estimation"
    CV_P3P = 2, // X.S. Gao, X.-R. Hou, J. Tang, H.-F. Chang; "Complete Solution Classification for the Perspective-Three-Point Problem"
    CV_DLS = 3 // Joel A. Hesch and Stergios I. Roumeliotis. "A Direct Least-Squares (DLS) Method for PnP"
 };
 CVAPI(int) cvFindFundamentalMat( const CvMat* points1, const CvMat* points2,
                                 CvMat* fundamental_matrix,
                                 int method CV_DEFAULT(CV_FM_RANSAC),
                                 double param1 CV_DEFAULT(3.), double param2 CV_DEFAULT(0.99),
                                 CvMat* status CV_DEFAULT(NULL) );
 /* For each input point on one of images
   computes parameters of the corresponding
   epipolar line on the other image */
 CVAPI(void) cvComputeCorrespondEpilines( const CvMat* points,
                                         int which_image,
                                         const CvMat* fundamental_matrix,
                                         CvMat* correspondent_lines );
 /* Triangulation functions */
 CVAPI(void) cvTriangulatePoints(CvMat* projMatr1, CvMat* projMatr2,
                                CvMat* projPoints1, CvMat* projPoints2,
                                CvMat* points4D);
 CVAPI(void) cvCorrectMatches(CvMat* F, CvMat* points1, CvMat* points2,
                             CvMat* new_points1, CvMat* new_points2);
 /* Computes the optimal new camera matrix according to the free scaling parameter alpha:
   alpha=0 - only valid pixels will be retained in the undistorted image
   alpha=1 - all the source image pixels will be retained in the undistorted image
 */
 CVAPI(void) cvGetOptimalNewCameraMatrix( const CvMat* camera_matrix,
                                         const CvMat* dist_coeffs,
                                         CvSize image_size, double alpha,
                                         CvMat* new_camera_matrix,
                                         CvSize new_imag_size CV_DEFAULT(cvSize(0,0)),
                                         CvRect* valid_pixel_ROI CV_DEFAULT(0),
                                         int center_principal_point CV_DEFAULT(0));
 /* Converts rotation vector to rotation matrix or vice versa */
 CVAPI(int) cvRodrigues2( const CvMat* src, CvMat* dst,
                         CvMat* jacobian CV_DEFAULT(0) );
 /* Finds perspective transformation between the object plane and image (view) plane */
 CVAPI(int) cvFindHomography( const CvMat* src_points,
                             const CvMat* dst_points,
                             CvMat* homography,
                             int method CV_DEFAULT(0),
                             double ransacReprojThreshold CV_DEFAULT(3),
                             CvMat* mask CV_DEFAULT(0),
                             int maxIters CV_DEFAULT(2000),
                             double confidence CV_DEFAULT(0.995));
 /* Computes RQ decomposition for 3x3 matrices */
 CVAPI(void) cvRQDecomp3x3( const CvMat *matrixM, CvMat *matrixR, CvMat *matrixQ,
                           CvMat *matrixQx CV_DEFAULT(NULL),
                           CvMat *matrixQy CV_DEFAULT(NULL),
                           CvMat *matrixQz CV_DEFAULT(NULL),
                           CvPoint3D64f *eulerAngles CV_DEFAULT(NULL));
 /* Computes projection matrix decomposition */
 CVAPI(void) cvDecomposeProjectionMatrix( const CvMat *projMatr, CvMat *calibMatr,
                                         CvMat *rotMatr, CvMat *posVect,
                                         CvMat *rotMatrX CV_DEFAULT(NULL),
                                         CvMat *rotMatrY CV_DEFAULT(NULL),
                                         CvMat *rotMatrZ CV_DEFAULT(NULL),
                                         CvPoint3D64f *eulerAngles CV_DEFAULT(NULL));
 /* Computes d(AB)/dA and d(AB)/dB */
 CVAPI(void) cvCalcMatMulDeriv( const CvMat* A, const CvMat* B, CvMat* dABdA, CvMat* dABdB );
 /* Computes r3 = rodrigues(rodrigues(r2)*rodrigues(r1)),
   t3 = rodrigues(r2)*t1 + t2 and the respective derivatives */
 CVAPI(void) cvComposeRT( const CvMat* _rvec1, const CvMat* _tvec1,
                         const CvMat* _rvec2, const CvMat* _tvec2,
                         CvMat* _rvec3, CvMat* _tvec3,
                         CvMat* dr3dr1 CV_DEFAULT(0), CvMat* dr3dt1 CV_DEFAULT(0),
                         CvMat* dr3dr2 CV_DEFAULT(0), CvMat* dr3dt2 CV_DEFAULT(0),
                         CvMat* dt3dr1 CV_DEFAULT(0), CvMat* dt3dt1 CV_DEFAULT(0),
                         CvMat* dt3dr2 CV_DEFAULT(0), CvMat* dt3dt2 CV_DEFAULT(0) );
 /* Projects object points to the view plane using
   the specified extrinsic and intrinsic camera parameters */
 CVAPI(void) cvProjectPoints2( const CvMat* object_points, const CvMat* rotation_vector,
                              const CvMat* translation_vector, const CvMat* camera_matrix,
                              const CvMat* distortion_coeffs, CvMat* image_points,
                              CvMat* dpdrot CV_DEFAULT(NULL), CvMat* dpdt CV_DEFAULT(NULL),
                              CvMat* dpdf CV_DEFAULT(NULL), CvMat* dpdc CV_DEFAULT(NULL),
                              CvMat* dpddist CV_DEFAULT(NULL),
                              double aspect_ratio CV_DEFAULT(0));
 /* Finds extrinsic camera parameters from
   a few known corresponding point pairs and intrinsic parameters */
 CVAPI(void) cvFindExtrinsicCameraParams2( const CvMat* object_points,
                                          const CvMat* image_points,
                                          const CvMat* camera_matrix,
                                          const CvMat* distortion_coeffs,
                                          CvMat* rotation_vector,
                                          CvMat* translation_vector,
                                          int use_extrinsic_guess CV_DEFAULT(0) );
 /* Computes initial estimate of the intrinsic camera parameters
   in case of planar calibration target (e.g. chessboard) */
 CVAPI(void) cvInitIntrinsicParams2D( const CvMat* object_points,
                                     const CvMat* image_points,
                                     const CvMat* npoints, CvSize image_size,
                                     CvMat* camera_matrix,
                                     double aspect_ratio CV_DEFAULT(1.) );
 #define CV_CALIB_CB_ADAPTIVE_THRESH  1
 #define CV_CALIB_CB_NORMALIZE_IMAGE  2
 #define CV_CALIB_CB_FILTER_QUADS     4
 #define CV_CALIB_CB_FAST_CHECK       8
 // Performs a fast check if a chessboard is in the input image. This is a workaround to
 // a problem of cvFindChessboardCorners being slow on images with no chessboard
 // - src: input image
 // - size: chessboard size
 // Returns 1 if a chessboard can be in this image and findChessboardCorners should be called,
 // 0 if there is no chessboard, -1 in case of error
 CVAPI(int) cvCheckChessboard(IplImage* src, CvSize size);
    /* Detects corners on a chessboard calibration pattern */
 CVAPI(int) cvFindChessboardCorners( const void* image, CvSize pattern_size,
                                    CvPoint2D32f* corners,
                                    int* corner_count CV_DEFAULT(NULL),
                                    int flags CV_DEFAULT(CV_CALIB_CB_ADAPTIVE_THRESH+CV_CALIB_CB_NORMALIZE_IMAGE) );
 /* Draws individual chessboard corners or the whole chessboard detected */
 CVAPI(void) cvDrawChessboardCorners( CvArr* image, CvSize pattern_size,
                                     CvPoint2D32f* corners,
                                     int count, int pattern_was_found );
 #define CV_CALIB_USE_INTRINSIC_GUESS  1
 #define CV_CALIB_FIX_ASPECT_RATIO     2
 #define CV_CALIB_FIX_PRINCIPAL_POINT  4
 #define CV_CALIB_ZERO_TANGENT_DIST    8
 #define CV_CALIB_FIX_FOCAL_LENGTH 16
 #define CV_CALIB_FIX_K1  32
 #define CV_CALIB_FIX_K2  64
 #define CV_CALIB_FIX_K3  128
 #define CV_CALIB_FIX_K4  2048
 #define CV_CALIB_FIX_K5  4096
 #define CV_CALIB_FIX_K6  8192
 #define CV_CALIB_RATIONAL_MODEL 16384
 #define CV_CALIB_THIN_PRISM_MODEL 32768
 #define CV_CALIB_FIX_S1_S2_S3_S4  65536
 #define CV_CALIB_TILTED_MODEL  262144
 #define CV_CALIB_FIX_TAUX_TAUY  524288
 #define CV_CALIB_FIX_TANGENT_DIST 2097152
 #define CV_CALIB_NINTRINSIC 18
 /* Finds intrinsic and extrinsic camera parameters
   from a few views of known calibration pattern */
 CVAPI(double) cvCalibrateCamera2( const CvMat* object_points,
                                const CvMat* image_points,
                                const CvMat* point_counts,
                                CvSize image_size,
                                CvMat* camera_matrix,
                                CvMat* distortion_coeffs,
                                CvMat* rotation_vectors CV_DEFAULT(NULL),
                                CvMat* translation_vectors CV_DEFAULT(NULL),
                                int flags CV_DEFAULT(0),
                                CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria(
                                    CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,DBL_EPSILON)) );
 /* Computes various useful characteristics of the camera from the data computed by
   cvCalibrateCamera2 */
 CVAPI(void) cvCalibrationMatrixValues( const CvMat *camera_matrix,
                                CvSize image_size,
                                double aperture_width CV_DEFAULT(0),
                                double aperture_height CV_DEFAULT(0),
                                double *fovx CV_DEFAULT(NULL),
                                double *fovy CV_DEFAULT(NULL),
                                double *focal_length CV_DEFAULT(NULL),
                                CvPoint2D64f *principal_point CV_DEFAULT(NULL),
                                double *pixel_aspect_ratio CV_DEFAULT(NULL));
 #define CV_CALIB_FIX_INTRINSIC  256
 #define CV_CALIB_SAME_FOCAL_LENGTH 512
 /* Computes the transformation from one camera coordinate system to another one
   from a few correspondent views of the same calibration target. Optionally, calibrates
   both cameras */
 CVAPI(double) cvStereoCalibrate( const CvMat* object_points, const CvMat* image_points1,
                               const CvMat* image_points2, const CvMat* npoints,
                               CvMat* camera_matrix1, CvMat* dist_coeffs1,
                               CvMat* camera_matrix2, CvMat* dist_coeffs2,
                               CvSize image_size, CvMat* R, CvMat* T,
                               CvMat* E CV_DEFAULT(0), CvMat* F CV_DEFAULT(0),
                               int flags CV_DEFAULT(CV_CALIB_FIX_INTRINSIC),
                               CvTermCriteria term_crit CV_DEFAULT(cvTermCriteria(
                                   CV_TERMCRIT_ITER+CV_TERMCRIT_EPS,30,1e-6)) );
 #define CV_CALIB_ZERO_DISPARITY 1024
 /* Computes 3D rotations (+ optional shift) for each camera coordinate system to make both
   views parallel (=> to make all the epipolar lines horizontal or vertical) */
 CVAPI(void) cvStereoRectify( const CvMat* camera_matrix1, const CvMat* camera_matrix2,
                             const CvMat* dist_coeffs1, const CvMat* dist_coeffs2,
                             CvSize image_size, const CvMat* R, const CvMat* T,
                             CvMat* R1, CvMat* R2, CvMat* P1, CvMat* P2,
                             CvMat* Q CV_DEFAULT(0),
                             int flags CV_DEFAULT(CV_CALIB_ZERO_DISPARITY),
                             double alpha CV_DEFAULT(-1),
                             CvSize new_image_size CV_DEFAULT(cvSize(0,0)),
                             CvRect* valid_pix_ROI1 CV_DEFAULT(0),
                             CvRect* valid_pix_ROI2 CV_DEFAULT(0));
 /* Computes rectification transformations for uncalibrated pair of images using a set
   of point correspondences */
 CVAPI(int) cvStereoRectifyUncalibrated( const CvMat* points1, const CvMat* points2,
                                        const CvMat* F, CvSize img_size,
                                        CvMat* H1, CvMat* H2,
                                        double threshold CV_DEFAULT(5));
 /* stereo correspondence parameters and functions */
 #define CV_STEREO_BM_NORMALIZED_RESPONSE  0
 #define CV_STEREO_BM_XSOBEL               1
 /* Block matching algorithm structure */
 typedef struct CvStereoBMState
 {
    // pre-filtering (normalization of input images)
    int preFilterType; // =CV_STEREO_BM_NORMALIZED_RESPONSE now
    int preFilterSize; // averaging window size: ~5x5..21x21
    int preFilterCap; // the output of pre-filtering is clipped by [-preFilterCap,preFilterCap]
    // correspondence using Sum of Absolute Difference (SAD)
    int SADWindowSize; // ~5x5..21x21
    int minDisparity;  // minimum disparity (can be negative)
    int numberOfDisparities; // maximum disparity - minimum disparity (> 0)
    // post-filtering
    int textureThreshold;  // the disparity is only computed for pixels
                           // with textured enough neighborhood
    int uniquenessRatio;   // accept the computed disparity d* only if
                           // SAD(d) >= SAD(d*)*(1 + uniquenessRatio/100.)
                           // for any d != d*+/-1 within the search range.
    int speckleWindowSize; // disparity variation window
    int speckleRange; // acceptable range of variation in window
    int trySmallerWindows; // if 1, the results may be more accurate,
                           // at the expense of slower processing
    CvRect roi1, roi2;
    int disp12MaxDiff;
    // temporary buffers
    CvMat* preFilteredImg0;
    CvMat* preFilteredImg1;
    CvMat* slidingSumBuf;
    CvMat* cost;
    CvMat* disp;
 } CvStereoBMState;
 #define CV_STEREO_BM_BASIC 0
 #define CV_STEREO_BM_FISH_EYE 1
 #define CV_STEREO_BM_NARROW 2
 CVAPI(CvStereoBMState*) cvCreateStereoBMState(int preset CV_DEFAULT(CV_STEREO_BM_BASIC),
                                              int numberOfDisparities CV_DEFAULT(0));
 CVAPI(void) cvReleaseStereoBMState( CvStereoBMState** state );
 CVAPI(void) cvFindStereoCorrespondenceBM( const CvArr* left, const CvArr* right,
                                          CvArr* disparity, CvStereoBMState* state );
 CVAPI(CvRect) cvGetValidDisparityROI( CvRect roi1, CvRect roi2, int minDisparity,
                                      int numberOfDisparities, int SADWindowSize );
 CVAPI(void) cvValidateDisparity( CvArr* disparity, const CvArr* cost,
                                 int minDisparity, int numberOfDisparities,
                                 int disp12MaxDiff CV_DEFAULT(1) );
 /* Reprojects the computed disparity image to the 3D space using the specified 4x4 matrix */
 CVAPI(void)  cvReprojectImageTo3D( const CvArr* disparityImage,
                                   CvArr* _3dImage, const CvMat* Q,
                                   int handleMissingValues CV_DEFAULT(0) );
 /** @} calib3d_c */
 #ifdef __cplusplus
 } // extern "C"
 //////////////////////////////////////////////////////////////////////////////////////////
 class CV_EXPORTS CvLevMarq
 {
 public:
    CvLevMarq();
    CvLevMarq( int nparams, int nerrs, CvTermCriteria criteria=
              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
              bool completeSymmFlag=false );
    ~CvLevMarq();
    void init( int nparams, int nerrs, CvTermCriteria criteria=
              cvTermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER,30,DBL_EPSILON),
              bool completeSymmFlag=false );
    bool update( const CvMat*& param, CvMat*& J, CvMat*& err );
    bool updateAlt( const CvMat*& param, CvMat*& JtJ, CvMat*& JtErr, double*& errNorm );
    void clear();
    void step();
    enum { DONE=0, STARTED=1, CALC_J=2, CHECK_ERR=3 };
    cv::Ptr<CvMat> mask;
    cv::Ptr<CvMat> prevParam;
    cv::Ptr<CvMat> param;
    cv::Ptr<CvMat> J;
    cv::Ptr<CvMat> err;
    cv::Ptr<CvMat> JtJ;
    cv::Ptr<CvMat> JtJN;
    cv::Ptr<CvMat> JtErr;
    cv::Ptr<CvMat> JtJV;
    cv::Ptr<CvMat> JtJW;
    double prevErrNorm, errNorm;
    int lambdaLg10;
    CvTermCriteria criteria;
    int state;
    int iters;
    bool completeSymmFlag;
    int solveMethod;
 };
 #endif
 #endif /* OPENCV_CALIB3D_C_H */
--- a/3rdparty/libopencv/include/opencv2/core.hpp
+++ b/3rdparty/libopencv/include/opencv2/core.hpp
--- a/3rdparty/libopencv/include/opencv2/core/affine.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/affine.hpp
@ -0,0 +1,678 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_AFFINE3_HPP
 #define OPENCV_CORE_AFFINE3_HPP
 #ifdef __cplusplus
 #include <opencv2/core.hpp>
 namespace cv
 {
 //! @addtogroup core
 //! @{
    /** @brief Affine transform
     *
     * It represents a 4x4 homogeneous transformation matrix \f$T\f$
     *
     *  \f[T =
     *  \begin{bmatrix}
     *  R & t\\
     *  0 & 1\\
     *  \end{bmatrix}
     *  \f]
     *
     *  where \f$R\f$ is a 3x3 rotation matrix and \f$t\f$ is a 3x1 translation vector.
     *
     *  You can specify \f$R\f$ either by a 3x3 rotation matrix or by a 3x1 rotation vector,
     *  which is converted to a 3x3 rotation matrix by the Rodrigues formula.
     *
     *  To construct a matrix \f$T\f$ representing first rotation around the axis \f$r\f$ with rotation
     *  angle \f$|r|\f$ in radian (right hand rule) and then translation by the vector \f$t\f$, you can use
     *
     *  @code
     *  cv::Vec3f r, t;
     *  cv::Affine3f T(r, t);
     *  @endcode
     *
     *  If you already have the rotation matrix \f$R\f$, then you can use
     *
     *  @code
     *  cv::Matx33f R;
     *  cv::Affine3f T(R, t);
     *  @endcode
     *
     *  To extract the rotation matrix \f$R\f$ from \f$T\f$, use
     *
     *  @code
     *  cv::Matx33f R = T.rotation();
     *  @endcode
     *
     *  To extract the translation vector \f$t\f$ from \f$T\f$, use
     *
     *  @code
     *  cv::Vec3f t = T.translation();
     *  @endcode
     *
     *  To extract the rotation vector \f$r\f$ from \f$T\f$, use
     *
     *  @code
     *  cv::Vec3f r = T.rvec();
     *  @endcode
     *
     *  Note that since the mapping from rotation vectors to rotation matrices
     *  is many to one. The returned rotation vector is not necessarily the one
     *  you used before to set the matrix.
     *
     *  If you have two transformations \f$T = T_1 * T_2\f$, use
     *
     *  @code
     *  cv::Affine3f T, T1, T2;
     *  T = T2.concatenate(T1);
     *  @endcode
     *
     *  To get the inverse transform of \f$T\f$, use
     *
     *  @code
     *  cv::Affine3f T, T_inv;
     *  T_inv = T.inv();
     *  @endcode
     *
     */
    template<typename T>
    class Affine3
    {
    public:
        typedef T float_type;
        typedef Matx<float_type, 3, 3> Mat3;
        typedef Matx<float_type, 4, 4> Mat4;
        typedef Vec<float_type, 3> Vec3;
       //! Default constructor. It represents a 4x4 identity matrix.
        Affine3();
        //! Augmented affine matrix
        Affine3(const Mat4& affine);
        /**
         *  The resulting 4x4 matrix is
         *
         *  \f[
         *  \begin{bmatrix}
         *  R & t\\
         *  0 & 1\\
         *  \end{bmatrix}
         *  \f]
         *
         * @param R 3x3 rotation matrix.
         * @param t 3x1 translation vector.
         */
        Affine3(const Mat3& R, const Vec3& t = Vec3::all(0));
        /**
         * Rodrigues vector.
         *
         * The last row of the current matrix is set to [0,0,0,1].
         *
         * @param rvec 3x1 rotation vector. Its direction indicates the rotation axis and its length
         *             indicates the rotation angle in radian (using right hand rule).
         * @param t 3x1 translation vector.
         */
        Affine3(const Vec3& rvec, const Vec3& t = Vec3::all(0));
        /**
         * Combines all constructors above. Supports 4x4, 3x4, 3x3, 1x3, 3x1 sizes of data matrix.
         *
         * The last row of the current matrix is set to [0,0,0,1] when data is not 4x4.
         *
         * @param data 1-channel matrix.
         *             when it is 4x4, it is copied to the current matrix and t is not used.
         *             When it is 3x4, it is copied to the upper part 3x4 of the current matrix and t is not used.
         *             When it is 3x3, it is copied to the upper left 3x3 part of the current matrix.
         *             When it is 3x1 or 1x3, it is treated as a rotation vector and the Rodrigues formula is used
         *                             to compute a 3x3 rotation matrix.
         * @param t 3x1 translation vector. It is used only when data is neither 4x4 nor 3x4.
         */
        explicit Affine3(const Mat& data, const Vec3& t = Vec3::all(0));
        //! From 16-element array
        explicit Affine3(const float_type* vals);
        //! Create an 4x4 identity transform
        static Affine3 Identity();
        /**
         * Rotation matrix.
         *
         * Copy the rotation matrix to the upper left 3x3 part of the current matrix.
         * The remaining elements of the current matrix are not changed.
         *
         * @param R 3x3 rotation matrix.
         *
         */
        void rotation(const Mat3& R);
        /**
         * Rodrigues vector.
         *
         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
         *
         * @param rvec 3x1 rotation vector. The direction indicates the rotation axis and
         *             its length indicates the rotation angle in radian (using the right thumb convention).
         */
        void rotation(const Vec3& rvec);
        /**
         * Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix.
         *
         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
         *
         * @param data 1-channel matrix.
         *             When it is a 3x3 matrix, it sets the upper left 3x3 part of the current matrix.
         *             When it is a 1x3 or 3x1 matrix, it is used as a rotation vector. The Rodrigues formula
         *             is used to compute the rotation matrix and sets the upper left 3x3 part of the current matrix.
         */
        void rotation(const Mat& data);
        /**
         * Copy the 3x3 matrix L to the upper left part of the current matrix
         *
         * It sets the upper left 3x3 part of the matrix. The remaining part is unaffected.
         *
         * @param L 3x3 matrix.
         */
        void linear(const Mat3& L);
        /**
         * Copy t to the first three elements of the last column of the current matrix
         *
         * It sets the upper right 3x1 part of the matrix. The remaining part is unaffected.
         *
         * @param t 3x1 translation vector.
         */
        void translation(const Vec3& t);
        //! @return the upper left 3x3 part
        Mat3 rotation() const;
        //! @return the upper left 3x3 part
        Mat3 linear() const;
        //! @return the upper right 3x1 part
        Vec3 translation() const;
        //! Rodrigues vector.
        //! @return a vector representing the upper left 3x3 rotation matrix of the current matrix.
        //! @warning  Since the mapping between rotation vectors and rotation matrices is many to one,
        //!           this function returns only one rotation vector that represents the current rotation matrix,
        //!           which is not necessarily the same one set by `rotation(const Vec3& rvec)`.
        Vec3 rvec() const;
        //! @return the inverse of the current matrix.
        Affine3 inv(int method = cv::DECOMP_SVD) const;
        //! a.rotate(R) is equivalent to Affine(R, 0) * a;
        Affine3 rotate(const Mat3& R) const;
        //! a.rotate(rvec) is equivalent to Affine(rvec, 0) * a;
        Affine3 rotate(const Vec3& rvec) const;
        //! a.translate(t) is equivalent to Affine(E, t) * a, where E is an identity matrix
        Affine3 translate(const Vec3& t) const;
        //! a.concatenate(affine) is equivalent to affine * a;
        Affine3 concatenate(const Affine3& affine) const;
        template <typename Y> operator Affine3<Y>() const;
        template <typename Y> Affine3<Y> cast() const;
        Mat4 matrix;
 #if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
        Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine);
        Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine);
        operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const;
        operator Eigen::Transform<T, 3, Eigen::Affine>() const;
 #endif
    };
    template<typename T> static
    Affine3<T> operator*(const Affine3<T>& affine1, const Affine3<T>& affine2);
    //! V is a 3-element vector with member fields x, y and z
    template<typename T, typename V> static
    V operator*(const Affine3<T>& affine, const V& vector);
    typedef Affine3<float> Affine3f;
    typedef Affine3<double> Affine3d;
    static Vec3f operator*(const Affine3f& affine, const Vec3f& vector);
    static Vec3d operator*(const Affine3d& affine, const Vec3d& vector);
    template<typename _Tp> class DataType< Affine3<_Tp> >
    {
    public:
        typedef Affine3<_Tp>                               value_type;
        typedef Affine3<typename DataType<_Tp>::work_type> work_type;
        typedef _Tp                                        channel_type;
        enum { generic_type = 0,
               channels     = 16,
               fmt          = traits::SafeFmt<channel_type>::fmt + ((channels - 1) << 8)
 #ifdef OPENCV_TRAITS_ENABLE_DEPRECATED
               ,depth        = DataType<channel_type>::depth
               ,type         = CV_MAKETYPE(depth, channels)
 #endif
             };
        typedef Vec<channel_type, channels> vec_type;
    };
    namespace traits {
    template<typename _Tp>
    struct Depth< Affine3<_Tp> > { enum { value = Depth<_Tp>::value }; };
    template<typename _Tp>
    struct Type< Affine3<_Tp> > { enum { value = CV_MAKETYPE(Depth<_Tp>::value, 16) }; };
    } // namespace
 //! @} core
 }
 //! @cond IGNORED
 ///////////////////////////////////////////////////////////////////////////////////
 // Implementation
 template<typename T> inline
 cv::Affine3<T>::Affine3()
    : matrix(Mat4::eye())
 {}
 template<typename T> inline
 cv::Affine3<T>::Affine3(const Mat4& affine)
    : matrix(affine)
 {}
 template<typename T> inline
 cv::Affine3<T>::Affine3(const Mat3& R, const Vec3& t)
 {
    rotation(R);
    translation(t);
    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
    matrix.val[15] = 1;
 }
 template<typename T> inline
 cv::Affine3<T>::Affine3(const Vec3& _rvec, const Vec3& t)
 {
    rotation(_rvec);
    translation(t);
    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
    matrix.val[15] = 1;
 }
 template<typename T> inline
 cv::Affine3<T>::Affine3(const cv::Mat& data, const Vec3& t)
 {
    CV_Assert(data.type() == cv::traits::Type<T>::value);
    CV_Assert(data.channels() == 1);
    if (data.cols == 4 && data.rows == 4)
    {
        data.copyTo(matrix);
        return;
    }
    else if (data.cols == 4 && data.rows == 3)
    {
        rotation(data(Rect(0, 0, 3, 3)));
        translation(data(Rect(3, 0, 1, 3)));
    }
    else
    {
        rotation(data);
        translation(t);
    }
    matrix.val[12] = matrix.val[13] = matrix.val[14] = 0;
    matrix.val[15] = 1;
 }
 template<typename T> inline
 cv::Affine3<T>::Affine3(const float_type* vals) : matrix(vals)
 {}
 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::Identity()
 {
    return Affine3<T>(cv::Affine3<T>::Mat4::eye());
 }
 template<typename T> inline
 void cv::Affine3<T>::rotation(const Mat3& R)
 {
    linear(R);
 }
 template<typename T> inline
 void cv::Affine3<T>::rotation(const Vec3& _rvec)
 {
    double theta = norm(_rvec);
    if (theta < DBL_EPSILON)
        rotation(Mat3::eye());
    else
    {
        double c = std::cos(theta);
        double s = std::sin(theta);
        double c1 = 1. - c;
        double itheta = (theta != 0) ? 1./theta : 0.;
        Point3_<T> r = _rvec*itheta;
        Mat3 rrt( r.x*r.x, r.x*r.y, r.x*r.z, r.x*r.y, r.y*r.y, r.y*r.z, r.x*r.z, r.y*r.z, r.z*r.z );
        Mat3 r_x( 0, -r.z, r.y, r.z, 0, -r.x, -r.y, r.x, 0 );
        // R = cos(theta)*I + (1 - cos(theta))*r*rT + sin(theta)*[r_x]
        // where [r_x] is [0 -rz ry; rz 0 -rx; -ry rx 0]
        Mat3 R = c*Mat3::eye() + c1*rrt + s*r_x;
        rotation(R);
    }
 }
 //Combines rotation methods above. Supports 3x3, 1x3, 3x1 sizes of data matrix;
 template<typename T> inline
 void cv::Affine3<T>::rotation(const cv::Mat& data)
 {
    CV_Assert(data.type() == cv::traits::Type<T>::value);
    CV_Assert(data.channels() == 1);
    if (data.cols == 3 && data.rows == 3)
    {
        Mat3 R;
        data.copyTo(R);
        rotation(R);
    }
    else if ((data.cols == 3 && data.rows == 1) || (data.cols == 1 && data.rows == 3))
    {
        Vec3 _rvec;
        data.reshape(1, 3).copyTo(_rvec);
        rotation(_rvec);
    }
    else
        CV_Assert(!"Input matrix can only be 3x3, 1x3 or 3x1");
 }
 template<typename T> inline
 void cv::Affine3<T>::linear(const Mat3& L)
 {
    matrix.val[0] = L.val[0]; matrix.val[1] = L.val[1];  matrix.val[ 2] = L.val[2];
    matrix.val[4] = L.val[3]; matrix.val[5] = L.val[4];  matrix.val[ 6] = L.val[5];
    matrix.val[8] = L.val[6]; matrix.val[9] = L.val[7];  matrix.val[10] = L.val[8];
 }
 template<typename T> inline
 void cv::Affine3<T>::translation(const Vec3& t)
 {
    matrix.val[3] = t[0]; matrix.val[7] = t[1]; matrix.val[11] = t[2];
 }
 template<typename T> inline
 typename cv::Affine3<T>::Mat3 cv::Affine3<T>::rotation() const
 {
    return linear();
 }
 template<typename T> inline
 typename cv::Affine3<T>::Mat3 cv::Affine3<T>::linear() const
 {
    typename cv::Affine3<T>::Mat3 R;
    R.val[0] = matrix.val[0];  R.val[1] = matrix.val[1];  R.val[2] = matrix.val[ 2];
    R.val[3] = matrix.val[4];  R.val[4] = matrix.val[5];  R.val[5] = matrix.val[ 6];
    R.val[6] = matrix.val[8];  R.val[7] = matrix.val[9];  R.val[8] = matrix.val[10];
    return R;
 }
 template<typename T> inline
 typename cv::Affine3<T>::Vec3 cv::Affine3<T>::translation() const
 {
    return Vec3(matrix.val[3], matrix.val[7], matrix.val[11]);
 }
 template<typename T> inline
 typename cv::Affine3<T>::Vec3 cv::Affine3<T>::rvec() const
 {
    cv::Vec3d w;
    cv::Matx33d u, vt, R = rotation();
    cv::SVD::compute(R, w, u, vt, cv::SVD::FULL_UV + cv::SVD::MODIFY_A);
    R = u * vt;
    double rx = R.val[7] - R.val[5];
    double ry = R.val[2] - R.val[6];
    double rz = R.val[3] - R.val[1];
    double s = std::sqrt((rx*rx + ry*ry + rz*rz)*0.25);
    double c = (R.val[0] + R.val[4] + R.val[8] - 1) * 0.5;
    c = c > 1.0 ? 1.0 : c < -1.0 ? -1.0 : c;
    double theta = acos(c);
    if( s < 1e-5 )
    {
        if( c > 0 )
            rx = ry = rz = 0;
        else
        {
            double t;
            t = (R.val[0] + 1) * 0.5;
            rx = std::sqrt(std::max(t, 0.0));
            t = (R.val[4] + 1) * 0.5;
            ry = std::sqrt(std::max(t, 0.0)) * (R.val[1] < 0 ? -1.0 : 1.0);
            t = (R.val[8] + 1) * 0.5;
            rz = std::sqrt(std::max(t, 0.0)) * (R.val[2] < 0 ? -1.0 : 1.0);
            if( fabs(rx) < fabs(ry) && fabs(rx) < fabs(rz) && (R.val[5] > 0) != (ry*rz > 0) )
                rz = -rz;
            theta /= std::sqrt(rx*rx + ry*ry + rz*rz);
            rx *= theta;
            ry *= theta;
            rz *= theta;
        }
    }
    else
    {
        double vth = 1/(2*s);
        vth *= theta;
        rx *= vth; ry *= vth; rz *= vth;
    }
    return cv::Vec3d(rx, ry, rz);
 }
 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::inv(int method) const
 {
    return matrix.inv(method);
 }
 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::rotate(const Mat3& R) const
 {
    Mat3 Lc = linear();
    Vec3 tc = translation();
    Mat4 result;
    result.val[12] = result.val[13] = result.val[14] = 0;
    result.val[15] = 1;
    for(int j = 0; j < 3; ++j)
    {
        for(int i = 0; i < 3; ++i)
        {
            float_type value = 0;
            for(int k = 0; k < 3; ++k)
                value += R(j, k) * Lc(k, i);
            result(j, i) = value;
        }
        result(j, 3) = R.row(j).dot(tc.t());
    }
    return result;
 }
 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::rotate(const Vec3& _rvec) const
 {
    return rotate(Affine3f(_rvec).rotation());
 }
 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::translate(const Vec3& t) const
 {
    Mat4 m = matrix;
    m.val[ 3] += t[0];
    m.val[ 7] += t[1];
    m.val[11] += t[2];
    return m;
 }
 template<typename T> inline
 cv::Affine3<T> cv::Affine3<T>::concatenate(const Affine3<T>& affine) const
 {
    return (*this).rotate(affine.rotation()).translate(affine.translation());
 }
 template<typename T> template <typename Y> inline
 cv::Affine3<T>::operator Affine3<Y>() const
 {
    return Affine3<Y>(matrix);
 }
 template<typename T> template <typename Y> inline
 cv::Affine3<Y> cv::Affine3<T>::cast() const
 {
    return Affine3<Y>(matrix);
 }
 template<typename T> inline
 cv::Affine3<T> cv::operator*(const cv::Affine3<T>& affine1, const cv::Affine3<T>& affine2)
 {
    return affine2.concatenate(affine1);
 }
 template<typename T, typename V> inline
 V cv::operator*(const cv::Affine3<T>& affine, const V& v)
 {
    const typename Affine3<T>::Mat4& m = affine.matrix;
    V r;
    r.x = m.val[0] * v.x + m.val[1] * v.y + m.val[ 2] * v.z + m.val[ 3];
    r.y = m.val[4] * v.x + m.val[5] * v.y + m.val[ 6] * v.z + m.val[ 7];
    r.z = m.val[8] * v.x + m.val[9] * v.y + m.val[10] * v.z + m.val[11];
    return r;
 }
 static inline
 cv::Vec3f cv::operator*(const cv::Affine3f& affine, const cv::Vec3f& v)
 {
    const cv::Matx44f& m = affine.matrix;
    cv::Vec3f r;
    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
    return r;
 }
 static inline
 cv::Vec3d cv::operator*(const cv::Affine3d& affine, const cv::Vec3d& v)
 {
    const cv::Matx44d& m = affine.matrix;
    cv::Vec3d r;
    r.val[0] = m.val[0] * v[0] + m.val[1] * v[1] + m.val[ 2] * v[2] + m.val[ 3];
    r.val[1] = m.val[4] * v[0] + m.val[5] * v[1] + m.val[ 6] * v[2] + m.val[ 7];
    r.val[2] = m.val[8] * v[0] + m.val[9] * v[1] + m.val[10] * v[2] + m.val[11];
    return r;
 }
 #if defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H
 template<typename T> inline
 cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>& affine)
 {
    cv::Mat(4, 4, cv::traits::Type<T>::value, affine.matrix().data()).copyTo(matrix);
 }
 template<typename T> inline
 cv::Affine3<T>::Affine3(const Eigen::Transform<T, 3, Eigen::Affine>& affine)
 {
    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> a = affine;
    cv::Mat(4, 4, cv::traits::Type<T>::value, a.matrix().data()).copyTo(matrix);
 }
 template<typename T> inline
 cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>() const
 {
    Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)> r;
    cv::Mat hdr(4, 4, cv::traits::Type<T>::value, r.matrix().data());
    cv::Mat(matrix, false).copyTo(hdr);
    return r;
 }
 template<typename T> inline
 cv::Affine3<T>::operator Eigen::Transform<T, 3, Eigen::Affine>() const
 {
    return this->operator Eigen::Transform<T, 3, Eigen::Affine, (Eigen::RowMajor)>();
 }
 #endif /* defined EIGEN_WORLD_VERSION && defined EIGEN_GEOMETRY_MODULE_H */
 //! @endcond
 #endif /* __cplusplus */
 #endif /* OPENCV_CORE_AFFINE3_HPP */
--- a/3rdparty/libopencv/include/opencv2/core/base.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/base.hpp
@ -0,0 +1,762 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2014, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_BASE_HPP
 #define OPENCV_CORE_BASE_HPP
 #ifndef __cplusplus
 #  error base.hpp header must be compiled as C++
 #endif
 #include "opencv2/opencv_modules.hpp"
 #include <climits>
 #include <algorithm>
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/cvstd.hpp"
 namespace cv
 {
 //! @addtogroup core_utils
 //! @{
 namespace Error {
 //! error codes
 enum Code {
    StsOk=                       0,  //!< everything is ok
    StsBackTrace=               -1,  //!< pseudo error for back trace
    StsError=                   -2,  //!< unknown /unspecified error
    StsInternal=                -3,  //!< internal error (bad state)
    StsNoMem=                   -4,  //!< insufficient memory
    StsBadArg=                  -5,  //!< function arg/param is bad
    StsBadFunc=                 -6,  //!< unsupported function
    StsNoConv=                  -7,  //!< iteration didn't converge
    StsAutoTrace=               -8,  //!< tracing
    HeaderIsNull=               -9,  //!< image header is NULL
    BadImageSize=              -10,  //!< image size is invalid
    BadOffset=                 -11,  //!< offset is invalid
    BadDataPtr=                -12,  //!<
    BadStep=                   -13,  //!< image step is wrong, this may happen for a non-continuous matrix.
    BadModelOrChSeq=           -14,  //!<
    BadNumChannels=            -15,  //!< bad number of channels, for example, some functions accept only single channel matrices.
    BadNumChannel1U=           -16,  //!<
    BadDepth=                  -17,  //!< input image depth is not supported by the function
    BadAlphaChannel=           -18,  //!<
    BadOrder=                  -19,  //!< number of dimensions is out of range
    BadOrigin=                 -20,  //!< incorrect input origin
    BadAlign=                  -21,  //!< incorrect input align
    BadCallBack=               -22,  //!<
    BadTileSize=               -23,  //!<
    BadCOI=                    -24,  //!< input COI is not supported
    BadROISize=                -25,  //!< incorrect input roi
    MaskIsTiled=               -26,  //!<
    StsNullPtr=                -27,  //!< null pointer
    StsVecLengthErr=           -28,  //!< incorrect vector length
    StsFilterStructContentErr= -29,  //!< incorrect filter structure content
    StsKernelStructContentErr= -30,  //!< incorrect transform kernel content
    StsFilterOffsetErr=        -31,  //!< incorrect filter offset value
    StsBadSize=                -201, //!< the input/output structure size is incorrect
    StsDivByZero=              -202, //!< division by zero
    StsInplaceNotSupported=    -203, //!< in-place operation is not supported
    StsObjectNotFound=         -204, //!< request can't be completed
    StsUnmatchedFormats=       -205, //!< formats of input/output arrays differ
    StsBadFlag=                -206, //!< flag is wrong or not supported
    StsBadPoint=               -207, //!< bad CvPoint
    StsBadMask=                -208, //!< bad format of mask (neither 8uC1 nor 8sC1)
    StsUnmatchedSizes=         -209, //!< sizes of input/output structures do not match
    StsUnsupportedFormat=      -210, //!< the data format/type is not supported by the function
    StsOutOfRange=             -211, //!< some of parameters are out of range
    StsParseError=             -212, //!< invalid syntax/structure of the parsed file
    StsNotImplemented=         -213, //!< the requested function/feature is not implemented
    StsBadMemBlock=            -214, //!< an allocated block has been corrupted
    StsAssert=                 -215, //!< assertion failed
    GpuNotSupported=           -216, //!< no CUDA support
    GpuApiCallError=           -217, //!< GPU API call error
    OpenGlNotSupported=        -218, //!< no OpenGL support
    OpenGlApiCallError=        -219, //!< OpenGL API call error
    OpenCLApiCallError=        -220, //!< OpenCL API call error
    OpenCLDoubleNotSupported=  -221,
    OpenCLInitError=           -222, //!< OpenCL initialization error
    OpenCLNoAMDBlasFft=        -223
 };
 } //Error
 //! @} core_utils
 //! @addtogroup core_array
 //! @{
 //! matrix decomposition types
 enum DecompTypes {
    /** Gaussian elimination with the optimal pivot element chosen. */
    DECOMP_LU       = 0,
    /** singular value decomposition (SVD) method; the system can be over-defined and/or the matrix
    src1 can be singular */
    DECOMP_SVD      = 1,
    /** eigenvalue decomposition; the matrix src1 must be symmetrical */
    DECOMP_EIG      = 2,
    /** Cholesky \f$LL^T\f$ factorization; the matrix src1 must be symmetrical and positively
    defined */
    DECOMP_CHOLESKY = 3,
    /** QR factorization; the system can be over-defined and/or the matrix src1 can be singular */
    DECOMP_QR       = 4,
    /** while all the previous flags are mutually exclusive, this flag can be used together with
    any of the previous; it means that the normal equations
    \f$\texttt{src1}^T\cdot\texttt{src1}\cdot\texttt{dst}=\texttt{src1}^T\texttt{src2}\f$ are
    solved instead of the original system
    \f$\texttt{src1}\cdot\texttt{dst}=\texttt{src2}\f$ */
    DECOMP_NORMAL   = 16
 };
 /** norm types
 src1 and src2 denote input arrays.
 */
 enum NormTypes {
                /**
                \f[
                norm =  \forkthree
                {\|\texttt{src1}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
                {\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} =  \max _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_INF}\) }
                {\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}}    }{\|\texttt{src2}\|_{L_{\infty}} }}{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_INF}\) }
                \f]
                */
                NORM_INF       = 1,
                /**
                \f[
                norm =  \forkthree
                {\| \texttt{src1} \| _{L_1} =  \sum _I | \texttt{src1} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\)}
                { \| \texttt{src1} - \texttt{src2} \| _{L_1} =  \sum _I | \texttt{src1} (I) -  \texttt{src2} (I)|}{if  \(\texttt{normType} = \texttt{NORM_L1}\) }
                { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L1}\) }
                \f]*/
                 NORM_L1        = 2,
                 /**
                 \f[
                 norm =  \forkthree
                 { \| \texttt{src1} \| _{L_2} =  \sqrt{\sum_I \texttt{src1}(I)^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} =  \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if  \(\texttt{normType} = \texttt{NORM_L2}\) }
                 { \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
                 \f]
                 */
                 NORM_L2        = 4,
                 /**
                 \f[
                 norm =  \forkthree
                 { \| \texttt{src1} \| _{L_2} ^{2} = \sum_I \texttt{src1}(I)^2} {if  \(\texttt{normType} = \texttt{NORM_L2SQR}\)}
                 { \| \texttt{src1} - \texttt{src2} \| _{L_2} ^{2} =  \sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2 }{if  \(\texttt{normType} = \texttt{NORM_L2SQR}\) }
                 { \left(\frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}}\right)^2 }{if  \(\texttt{normType} = \texttt{NORM_RELATIVE | NORM_L2}\) }
                 \f]
                 */
                 NORM_L2SQR     = 5,
                 /**
                 In the case of one input array, calculates the Hamming distance of the array from zero,
                 In the case of two input arrays, calculates the Hamming distance between the arrays.
                 */
                 NORM_HAMMING   = 6,
                 /**
                 Similar to NORM_HAMMING, but in the calculation, each two bits of the input sequence will
                 be added and treated as a single bit to be used in the same calculation as NORM_HAMMING.
                 */
                 NORM_HAMMING2  = 7,
                 NORM_TYPE_MASK = 7, //!< bit-mask which can be used to separate norm type from norm flags
                 NORM_RELATIVE  = 8, //!< flag
                 NORM_MINMAX    = 32 //!< flag
               };
 //! comparison types
 enum CmpTypes { CMP_EQ = 0, //!< src1 is equal to src2.
                CMP_GT = 1, //!< src1 is greater than src2.
                CMP_GE = 2, //!< src1 is greater than or equal to src2.
                CMP_LT = 3, //!< src1 is less than src2.
                CMP_LE = 4, //!< src1 is less than or equal to src2.
                CMP_NE = 5  //!< src1 is unequal to src2.
              };
 //! generalized matrix multiplication flags
 enum GemmFlags { GEMM_1_T = 1, //!< transposes src1
                 GEMM_2_T = 2, //!< transposes src2
                 GEMM_3_T = 4 //!< transposes src3
               };
 enum DftFlags {
    /** performs an inverse 1D or 2D transform instead of the default forward
        transform. */
    DFT_INVERSE        = 1,
    /** scales the result: divide it by the number of array elements. Normally, it is
        combined with DFT_INVERSE. */
    DFT_SCALE          = 2,
    /** performs a forward or inverse transform of every individual row of the input
        matrix; this flag enables you to transform multiple vectors simultaneously and can be used to
        decrease the overhead (which is sometimes several times larger than the processing itself) to
        perform 3D and higher-dimensional transformations and so forth.*/
    DFT_ROWS           = 4,
    /** performs a forward transformation of 1D or 2D real array; the result,
        though being a complex array, has complex-conjugate symmetry (*CCS*, see the function
        description below for details), and such an array can be packed into a real array of the same
        size as input, which is the fastest option and which is what the function does by default;
        however, you may wish to get a full complex array (for simpler spectrum analysis, and so on) -
        pass the flag to enable the function to produce a full-size complex output array. */
    DFT_COMPLEX_OUTPUT = 16,
    /** performs an inverse transformation of a 1D or 2D complex array; the
        result is normally a complex array of the same size, however, if the input array has
        conjugate-complex symmetry (for example, it is a result of forward transformation with
        DFT_COMPLEX_OUTPUT flag), the output is a real array; while the function itself does not
        check whether the input is symmetrical or not, you can pass the flag and then the function
        will assume the symmetry and produce the real output array (note that when the input is packed
        into a real array and inverse transformation is executed, the function treats the input as a
        packed complex-conjugate symmetrical array, and the output will also be a real array). */
    DFT_REAL_OUTPUT    = 32,
    /** specifies that input is complex input. If this flag is set, the input must have 2 channels.
        On the other hand, for backwards compatibility reason, if input has 2 channels, input is
        already considered complex. */
    DFT_COMPLEX_INPUT  = 64,
    /** performs an inverse 1D or 2D transform instead of the default forward transform. */
    DCT_INVERSE        = DFT_INVERSE,
    /** performs a forward or inverse transform of every individual row of the input
        matrix. This flag enables you to transform multiple vectors simultaneously and can be used to
        decrease the overhead (which is sometimes several times larger than the processing itself) to
        perform 3D and higher-dimensional transforms and so forth.*/
    DCT_ROWS           = DFT_ROWS
 };
 //! Various border types, image boundaries are denoted with `|`
 //! @see borderInterpolate, copyMakeBorder
 enum BorderTypes {
    BORDER_CONSTANT    = 0, //!< `iiiiii|abcdefgh|iiiiiii`  with some specified `i`
    BORDER_REPLICATE   = 1, //!< `aaaaaa|abcdefgh|hhhhhhh`
    BORDER_REFLECT     = 2, //!< `fedcba|abcdefgh|hgfedcb`
    BORDER_WRAP        = 3, //!< `cdefgh|abcdefgh|abcdefg`
    BORDER_REFLECT_101 = 4, //!< `gfedcb|abcdefgh|gfedcba`
    BORDER_TRANSPARENT = 5, //!< `uvwxyz|abcdefgh|ijklmno`
    BORDER_REFLECT101  = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
    BORDER_DEFAULT     = BORDER_REFLECT_101, //!< same as BORDER_REFLECT_101
    BORDER_ISOLATED    = 16 //!< do not look outside of ROI
 };
 //! @} core_array
 //! @addtogroup core_utils
 //! @{
 //! @cond IGNORED
 //////////////// static assert /////////////////
 #define CVAUX_CONCAT_EXP(a, b) a##b
 #define CVAUX_CONCAT(a, b) CVAUX_CONCAT_EXP(a,b)
 #if defined(__clang__)
 #  ifndef __has_extension
 #    define __has_extension __has_feature /* compatibility, for older versions of clang */
 #  endif
 #  if __has_extension(cxx_static_assert)
 #    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
 #  elif __has_extension(c_static_assert)
 #    define CV_StaticAssert(condition, reason)    _Static_assert((condition), reason " " #condition)
 #  endif
 #elif defined(__GNUC__)
 #  if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L)
 #    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
 #  endif
 #elif defined(_MSC_VER)
 #  if _MSC_VER >= 1600 /* MSVC 10 */
 #    define CV_StaticAssert(condition, reason)    static_assert((condition), reason " " #condition)
 #  endif
 #endif
 #ifndef CV_StaticAssert
 #  if !defined(__clang__) && defined(__GNUC__) && (__GNUC__*100 + __GNUC_MINOR__ > 302)
 #    define CV_StaticAssert(condition, reason) ({ extern int __attribute__((error("CV_StaticAssert: " reason " " #condition))) CV_StaticAssert(); ((condition) ? 0 : CV_StaticAssert()); })
 #  else
     template <bool x> struct CV_StaticAssert_failed;
     template <> struct CV_StaticAssert_failed<true> { enum { val = 1 }; };
     template<int x> struct CV_StaticAssert_test {};
 #    define CV_StaticAssert(condition, reason)\
       typedef cv::CV_StaticAssert_test< sizeof(cv::CV_StaticAssert_failed< static_cast<bool>(condition) >) > CVAUX_CONCAT(CV_StaticAssert_failed_at_, __LINE__)
 #  endif
 #endif
 // Suppress warning "-Wdeprecated-declarations" / C4996
 #if defined(_MSC_VER)
    #define CV_DO_PRAGMA(x) __pragma(x)
 #elif defined(__GNUC__)
    #define CV_DO_PRAGMA(x) _Pragma (#x)
 #else
    #define CV_DO_PRAGMA(x)
 #endif
 #ifdef _MSC_VER
 #define CV_SUPPRESS_DEPRECATED_START \
    CV_DO_PRAGMA(warning(push)) \
    CV_DO_PRAGMA(warning(disable: 4996))
 #define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(warning(pop))
 #elif defined (__clang__) || ((__GNUC__)  && (__GNUC__*100 + __GNUC_MINOR__ > 405))
 #define CV_SUPPRESS_DEPRECATED_START \
    CV_DO_PRAGMA(GCC diagnostic push) \
    CV_DO_PRAGMA(GCC diagnostic ignored "-Wdeprecated-declarations")
 #define CV_SUPPRESS_DEPRECATED_END CV_DO_PRAGMA(GCC diagnostic pop)
 #else
 #define CV_SUPPRESS_DEPRECATED_START
 #define CV_SUPPRESS_DEPRECATED_END
 #endif
 #define CV_UNUSED(name) (void)name
 #if defined __GNUC__ && !defined __EXCEPTIONS
 #define CV_TRY
 #define CV_CATCH(A, B) for (A B; false; )
 #define CV_CATCH_ALL if (false)
 #define CV_THROW(A) abort()
 #define CV_RETHROW() abort()
 #else
 #define CV_TRY try
 #define CV_CATCH(A, B) catch(const A & B)
 #define CV_CATCH_ALL catch(...)
 #define CV_THROW(A) throw A
 #define CV_RETHROW() throw
 #endif
 //! @endcond
 /*! @brief Signals an error and raises the exception.
 By default the function prints information about the error to stderr,
 then it either stops if setBreakOnError() had been called before or raises the exception.
 It is possible to alternate error processing by using redirectError().
@param _code - error code (Error::Code)
@param _err - error description
@param _func - function name. Available only when the compiler supports getting it
@param _file - source file name where the error has occurred
@param _line - line number in the source file where the error has occurred
@see CV_Error, CV_Error_, CV_ErrorNoReturn, CV_ErrorNoReturn_, CV_Assert, CV_DbgAssert
 */
 CV_EXPORTS void error(int _code, const String& _err, const char* _func, const char* _file, int _line);
 #ifdef __GNUC__
 # if defined __clang__ || defined __APPLE__
 #   pragma GCC diagnostic push
 #   pragma GCC diagnostic ignored "-Winvalid-noreturn"
 # endif
 #endif
 /** same as cv::error, but does not return */
 CV_INLINE CV_NORETURN void errorNoReturn(int _code, const String& _err, const char* _func, const char* _file, int _line)
 {
    error(_code, _err, _func, _file, _line);
 #ifdef __GNUC__
 # if !defined __clang__ && !defined __APPLE__
    // this suppresses this warning: "noreturn" function does return [enabled by default]
    __builtin_trap();
    // or use infinite loop: for (;;) {}
 # endif
 #endif
 }
 #ifdef __GNUC__
 # if defined __clang__ || defined __APPLE__
 #   pragma GCC diagnostic pop
 # endif
 #endif
 #if defined __GNUC__
 #define CV_Func __func__
 #elif defined _MSC_VER
 #define CV_Func __FUNCTION__
 #else
 #define CV_Func ""
 #endif
 #ifdef CV_STATIC_ANALYSIS
 // In practice, some macro are not processed correctly (noreturn is not detected).
 // We need to use simplified definition for them.
 #define CV_Error(...) do { abort(); } while (0)
 #define CV_Error_( code, args ) do { cv::format args; abort(); } while (0)
 #define CV_ErrorNoReturn(...) do { abort(); } while (0)
 #define CV_ErrorNoReturn_(...) do { abort(); } while (0)
 #define CV_Assert_1( expr ) do { if (!(expr)) abort(); } while (0)
 #else // CV_STATIC_ANALYSIS
 /** @brief Call the error handler.
 Currently, the error handler prints the error code and the error message to the standard
 error stream `stderr`. In the Debug configuration, it then provokes memory access violation, so that
 the execution stack and all the parameters can be analyzed by the debugger. In the Release
 configuration, the exception is thrown.
@param code one of Error::Code
@param msg error message
 */
 #define CV_Error( code, msg ) cv::error( code, msg, CV_Func, __FILE__, __LINE__ )
 /**  @brief Call the error handler.
 This macro can be used to construct an error message on-fly to include some dynamic information,
 for example:
@code
    // note the extra parentheses around the formatted text message
    CV_Error_( CV_StsOutOfRange,
    ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
@endcode
@param code one of Error::Code
@param args printf-like formatted error message in parentheses
 */
 #define CV_Error_( code, args ) cv::error( code, cv::format args, CV_Func, __FILE__, __LINE__ )
 /** same as CV_Error(code,msg), but does not return */
 #define CV_ErrorNoReturn( code, msg ) cv::errorNoReturn( code, msg, CV_Func, __FILE__, __LINE__ )
 /** same as CV_Error_(code,args), but does not return */
 #define CV_ErrorNoReturn_( code, args ) cv::errorNoReturn( code, cv::format args, CV_Func, __FILE__, __LINE__ )
 #define CV_Assert_1( expr ) if(!!(expr)) ; else cv::error( cv::Error::StsAssert, #expr, CV_Func, __FILE__, __LINE__ )
 #endif // CV_STATIC_ANALYSIS
 #define CV_Assert_2( expr1, expr2 ) CV_Assert_1(expr1); CV_Assert_1(expr2)
 #define CV_Assert_3( expr1, expr2, expr3 ) CV_Assert_2(expr1, expr2); CV_Assert_1(expr3)
 #define CV_Assert_4( expr1, expr2, expr3, expr4 ) CV_Assert_3(expr1, expr2, expr3); CV_Assert_1(expr4)
 #define CV_Assert_5( expr1, expr2, expr3, expr4, expr5 ) CV_Assert_4(expr1, expr2, expr3, expr4); CV_Assert_1(expr5)
 #define CV_Assert_6( expr1, expr2, expr3, expr4, expr5, expr6 ) CV_Assert_5(expr1, expr2, expr3, expr4, expr5); CV_Assert_1(expr6)
 #define CV_Assert_7( expr1, expr2, expr3, expr4, expr5, expr6, expr7 ) CV_Assert_6(expr1, expr2, expr3, expr4, expr5, expr6 ); CV_Assert_1(expr7)
 #define CV_Assert_8( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8 ) CV_Assert_7(expr1, expr2, expr3, expr4, expr5, expr6, expr7 ); CV_Assert_1(expr8)
 #define CV_Assert_9( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9 ) CV_Assert_8(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8 ); CV_Assert_1(expr9)
 #define CV_Assert_10( expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9, expr10 ) CV_Assert_9(expr1, expr2, expr3, expr4, expr5, expr6, expr7, expr8, expr9 ); CV_Assert_1(expr10)
 #define CV_VA_NUM_ARGS_HELPER(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
 #define CV_VA_NUM_ARGS(...) CV_VA_NUM_ARGS_HELPER(__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
 /** @brief Checks a condition at runtime and throws exception if it fails
 The macros CV_Assert (and CV_DbgAssert(expr)) evaluate the specified expression. If it is 0, the macros
 raise an error (see cv::error). The macro CV_Assert checks the condition in both Debug and Release
 configurations while CV_DbgAssert is only retained in the Debug configuration.
 */
 #define CV_Assert(...) do { CVAUX_CONCAT(CV_Assert_, CV_VA_NUM_ARGS(__VA_ARGS__)) (__VA_ARGS__); } while(0)
 /** replaced with CV_Assert(expr) in Debug configuration */
 #ifdef _DEBUG
 #  define CV_DbgAssert(expr) CV_Assert(expr)
 #else
 #  define CV_DbgAssert(expr)
 #endif
 /*
 * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor
 * bit count of A exclusive XOR'ed with B
 */
 struct CV_EXPORTS Hamming
 {
    enum { normType = NORM_HAMMING };
    typedef unsigned char ValueType;
    typedef int ResultType;
    /** this will count the bits in a ^ b
     */
    ResultType operator()( const unsigned char* a, const unsigned char* b, int size ) const;
 };
 typedef Hamming HammingLUT;
 /////////////////////////////////// inline norms ////////////////////////////////////
 template<typename _Tp> inline _Tp cv_abs(_Tp x) { return std::abs(x); }
 inline int cv_abs(uchar x) { return x; }
 inline int cv_abs(schar x) { return std::abs(x); }
 inline int cv_abs(ushort x) { return x; }
 inline int cv_abs(short x) { return std::abs(x); }
 template<typename _Tp, typename _AccTp> static inline
 _AccTp normL2Sqr(const _Tp* a, int n)
 {
    _AccTp s = 0;
    int i=0;
 #if CV_ENABLE_UNROLLED
    for( ; i <= n - 4; i += 4 )
    {
        _AccTp v0 = a[i], v1 = a[i+1], v2 = a[i+2], v3 = a[i+3];
        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
    }
 #endif
    for( ; i < n; i++ )
    {
        _AccTp v = a[i];
        s += v*v;
    }
    return s;
 }
 template<typename _Tp, typename _AccTp> static inline
 _AccTp normL1(const _Tp* a, int n)
 {
    _AccTp s = 0;
    int i = 0;
 #if CV_ENABLE_UNROLLED
    for(; i <= n - 4; i += 4 )
    {
        s += (_AccTp)cv_abs(a[i]) + (_AccTp)cv_abs(a[i+1]) +
            (_AccTp)cv_abs(a[i+2]) + (_AccTp)cv_abs(a[i+3]);
    }
 #endif
    for( ; i < n; i++ )
        s += cv_abs(a[i]);
    return s;
 }
 template<typename _Tp, typename _AccTp> static inline
 _AccTp normInf(const _Tp* a, int n)
 {
    _AccTp s = 0;
    for( int i = 0; i < n; i++ )
        s = std::max(s, (_AccTp)cv_abs(a[i]));
    return s;
 }
 template<typename _Tp, typename _AccTp> static inline
 _AccTp normL2Sqr(const _Tp* a, const _Tp* b, int n)
 {
    _AccTp s = 0;
    int i= 0;
 #if CV_ENABLE_UNROLLED
    for(; i <= n - 4; i += 4 )
    {
        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
        s += v0*v0 + v1*v1 + v2*v2 + v3*v3;
    }
 #endif
    for( ; i < n; i++ )
    {
        _AccTp v = _AccTp(a[i] - b[i]);
        s += v*v;
    }
    return s;
 }
 static inline float normL2Sqr(const float* a, const float* b, int n)
 {
    float s = 0.f;
    for( int i = 0; i < n; i++ )
    {
        float v = a[i] - b[i];
        s += v*v;
    }
    return s;
 }
 template<typename _Tp, typename _AccTp> static inline
 _AccTp normL1(const _Tp* a, const _Tp* b, int n)
 {
    _AccTp s = 0;
    int i= 0;
 #if CV_ENABLE_UNROLLED
    for(; i <= n - 4; i += 4 )
    {
        _AccTp v0 = _AccTp(a[i] - b[i]), v1 = _AccTp(a[i+1] - b[i+1]), v2 = _AccTp(a[i+2] - b[i+2]), v3 = _AccTp(a[i+3] - b[i+3]);
        s += std::abs(v0) + std::abs(v1) + std::abs(v2) + std::abs(v3);
    }
 #endif
    for( ; i < n; i++ )
    {
        _AccTp v = _AccTp(a[i] - b[i]);
        s += std::abs(v);
    }
    return s;
 }
 inline float normL1(const float* a, const float* b, int n)
 {
    float s = 0.f;
    for( int i = 0; i < n; i++ )
    {
        s += std::abs(a[i] - b[i]);
    }
    return s;
 }
 inline int normL1(const uchar* a, const uchar* b, int n)
 {
    int s = 0;
    for( int i = 0; i < n; i++ )
    {
        s += std::abs(a[i] - b[i]);
    }
    return s;
 }
 template<typename _Tp, typename _AccTp> static inline
 _AccTp normInf(const _Tp* a, const _Tp* b, int n)
 {
    _AccTp s = 0;
    for( int i = 0; i < n; i++ )
    {
        _AccTp v0 = a[i] - b[i];
        s = std::max(s, std::abs(v0));
    }
    return s;
 }
 /** @brief Computes the cube root of an argument.
 The function cubeRoot computes \f$\sqrt[3]{\texttt{val}}\f$. Negative arguments are handled correctly.
 NaN and Inf are not handled. The accuracy approaches the maximum possible accuracy for
 single-precision data.
 @param val A function argument.
 */
 CV_EXPORTS_W float cubeRoot(float val);
 /** @brief Calculates the angle of a 2D vector in degrees.
 The function fastAtan2 calculates the full-range angle of an input 2D vector. The angle is measured
 in degrees and varies from 0 to 360 degrees. The accuracy is about 0.3 degrees.
 @param x x-coordinate of the vector.
 @param y y-coordinate of the vector.
 */
 CV_EXPORTS_W float fastAtan2(float y, float x);
 /** proxy for hal::LU */
 CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 /** proxy for hal::LU */
 CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 /** proxy for hal::Cholesky */
 CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 /** proxy for hal::Cholesky */
 CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 ////////////////// forward declarations for important OpenCV types //////////////////
 //! @cond IGNORED
 template<typename _Tp, int cn> class Vec;
 template<typename _Tp, int m, int n> class Matx;
 template<typename _Tp> class Complex;
 template<typename _Tp> class Point_;
 template<typename _Tp> class Point3_;
 template<typename _Tp> class Size_;
 template<typename _Tp> class Rect_;
 template<typename _Tp> class Scalar_;
 class CV_EXPORTS RotatedRect;
 class CV_EXPORTS Range;
 class CV_EXPORTS TermCriteria;
 class CV_EXPORTS KeyPoint;
 class CV_EXPORTS DMatch;
 class CV_EXPORTS RNG;
 class CV_EXPORTS Mat;
 class CV_EXPORTS MatExpr;
 class CV_EXPORTS UMat;
 class CV_EXPORTS SparseMat;
 typedef Mat MatND;
 template<typename _Tp> class Mat_;
 template<typename _Tp> class SparseMat_;
 class CV_EXPORTS MatConstIterator;
 class CV_EXPORTS SparseMatIterator;
 class CV_EXPORTS SparseMatConstIterator;
 template<typename _Tp> class MatIterator_;
 template<typename _Tp> class MatConstIterator_;
 template<typename _Tp> class SparseMatIterator_;
 template<typename _Tp> class SparseMatConstIterator_;
 namespace ogl
 {
    class CV_EXPORTS Buffer;
    class CV_EXPORTS Texture2D;
    class CV_EXPORTS Arrays;
 }
 namespace cuda
 {
    class CV_EXPORTS GpuMat;
    class CV_EXPORTS HostMem;
    class CV_EXPORTS Stream;
    class CV_EXPORTS Event;
 }
 namespace cudev
 {
    template <typename _Tp> class GpuMat_;
 }
 namespace ipp
 {
 #if OPENCV_ABI_COMPATIBILITY > 300
 CV_EXPORTS   unsigned long long getIppFeatures();
 #else
 CV_EXPORTS   int getIppFeatures();
 #endif
 CV_EXPORTS   void setIppStatus(int status, const char * const funcname = NULL, const char * const filename = NULL,
                             int line = 0);
 CV_EXPORTS   int getIppStatus();
 CV_EXPORTS   String getIppErrorLocation();
 CV_EXPORTS_W bool   useIPP();
 CV_EXPORTS_W void   setUseIPP(bool flag);
 CV_EXPORTS_W String getIppVersion();
 // IPP Not-Exact mode. This function may force use of IPP then both IPP and OpenCV provide proper results
 // but have internal accuracy differences which have to much direct or indirect impact on accuracy tests.
 CV_EXPORTS_W bool useIPP_NE();
 CV_EXPORTS_W void setUseIPP_NE(bool flag);
 } // ipp
 //! @endcond
 //! @} core_utils
 } // cv
 #include "opencv2/core/neon_utils.hpp"
 #include "opencv2/core/vsx_utils.hpp"
 #endif //OPENCV_CORE_BASE_HPP
--- a/3rdparty/libopencv/include/opencv2/core/bufferpool.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/bufferpool.hpp
@ -0,0 +1,40 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 //
 // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
 #ifndef OPENCV_CORE_BUFFER_POOL_HPP
 #define OPENCV_CORE_BUFFER_POOL_HPP
 #ifdef _MSC_VER
 #pragma warning(push)
 #pragma warning(disable: 4265)
 #endif
 namespace cv
 {
 //! @addtogroup core
 //! @{
 class BufferPoolController
 {
 protected:
    ~BufferPoolController() { }
 public:
    virtual size_t getReservedSize() const = 0;
    virtual size_t getMaxReservedSize() const = 0;
    virtual void setMaxReservedSize(size_t size) = 0;
    virtual void freeAllReservedBuffers() = 0;
 };
 //! @}
 }
 #ifdef _MSC_VER
 #pragma warning(pop)
 #endif
 #endif // OPENCV_CORE_BUFFER_POOL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/core.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/core.hpp
@ -0,0 +1,48 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifdef __OPENCV_BUILD
 #error this is a compatibility header which should not be used inside the OpenCV library
 #endif
 #include "opencv2/core.hpp"
--- a/3rdparty/libopencv/include/opencv2/core/core_c.h
+++ b/3rdparty/libopencv/include/opencv2/core/core_c.h
--- a/3rdparty/libopencv/include/opencv2/core/cuda.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda.hpp
--- a/3rdparty/libopencv/include/opencv2/core/cuda.inl.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda.inl.hpp
@ -0,0 +1,631 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_CUDAINL_HPP
 #define OPENCV_CORE_CUDAINL_HPP
 #include "opencv2/core/cuda.hpp"
 //! @cond IGNORED
 namespace cv { namespace cuda {
 //===================================================================================
 // GpuMat
 //===================================================================================
 inline
 GpuMat::GpuMat(Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {}
 inline
 GpuMat::GpuMat(int rows_, int cols_, int type_, Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (rows_ > 0 && cols_ > 0)
        create(rows_, cols_, type_);
 }
 inline
 GpuMat::GpuMat(Size size_, int type_, Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (size_.height > 0 && size_.width > 0)
        create(size_.height, size_.width, type_);
 }
 inline
 GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_, Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (rows_ > 0 && cols_ > 0)
    {
        create(rows_, cols_, type_);
        setTo(s_);
    }
 }
 inline
 GpuMat::GpuMat(Size size_, int type_, Scalar s_, Allocator* allocator_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    if (size_.height > 0 && size_.width > 0)
    {
        create(size_.height, size_.width, type_);
        setTo(s_);
    }
 }
 inline
 GpuMat::GpuMat(const GpuMat& m)
    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), allocator(m.allocator)
 {
    if (refcount)
        CV_XADD(refcount, 1);
 }
 inline
 GpuMat::GpuMat(InputArray arr, Allocator* allocator_) :
    flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), allocator(allocator_)
 {
    upload(arr);
 }
 inline
 GpuMat::~GpuMat()
 {
    release();
 }
 inline
 GpuMat& GpuMat::operator =(const GpuMat& m)
 {
    if (this != &m)
    {
        GpuMat temp(m);
        swap(temp);
    }
    return *this;
 }
 inline
 void GpuMat::create(Size size_, int type_)
 {
    create(size_.height, size_.width, type_);
 }
 inline
 void GpuMat::swap(GpuMat& b)
 {
    std::swap(flags, b.flags);
    std::swap(rows, b.rows);
    std::swap(cols, b.cols);
    std::swap(step, b.step);
    std::swap(data, b.data);
    std::swap(datastart, b.datastart);
    std::swap(dataend, b.dataend);
    std::swap(refcount, b.refcount);
    std::swap(allocator, b.allocator);
 }
 inline
 GpuMat GpuMat::clone() const
 {
    GpuMat m;
    copyTo(m);
    return m;
 }
 inline
 void GpuMat::copyTo(OutputArray dst, InputArray mask) const
 {
    copyTo(dst, mask, Stream::Null());
 }
 inline
 GpuMat& GpuMat::setTo(Scalar s)
 {
    return setTo(s, Stream::Null());
 }
 inline
 GpuMat& GpuMat::setTo(Scalar s, InputArray mask)
 {
    return setTo(s, mask, Stream::Null());
 }
 inline
 void GpuMat::convertTo(OutputArray dst, int rtype) const
 {
    convertTo(dst, rtype, Stream::Null());
 }
 inline
 void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, double beta) const
 {
    convertTo(dst, rtype, alpha, beta, Stream::Null());
 }
 inline
 void GpuMat::convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const
 {
    convertTo(dst, rtype, alpha, 0.0, stream);
 }
 inline
 void GpuMat::assignTo(GpuMat& m, int _type) const
 {
    if (_type < 0)
        m = *this;
    else
        convertTo(m, _type);
 }
 inline
 uchar* GpuMat::ptr(int y)
 {
    CV_DbgAssert( (unsigned)y < (unsigned)rows );
    return data + step * y;
 }
 inline
 const uchar* GpuMat::ptr(int y) const
 {
    CV_DbgAssert( (unsigned)y < (unsigned)rows );
    return data + step * y;
 }
 template<typename _Tp> inline
 _Tp* GpuMat::ptr(int y)
 {
    return (_Tp*)ptr(y);
 }
 template<typename _Tp> inline
 const _Tp* GpuMat::ptr(int y) const
 {
    return (const _Tp*)ptr(y);
 }
 template <class T> inline
 GpuMat::operator PtrStepSz<T>() const
 {
    return PtrStepSz<T>(rows, cols, (T*)data, step);
 }
 template <class T> inline
 GpuMat::operator PtrStep<T>() const
 {
    return PtrStep<T>((T*)data, step);
 }
 inline
 GpuMat GpuMat::row(int y) const
 {
    return GpuMat(*this, Range(y, y+1), Range::all());
 }
 inline
 GpuMat GpuMat::col(int x) const
 {
    return GpuMat(*this, Range::all(), Range(x, x+1));
 }
 inline
 GpuMat GpuMat::rowRange(int startrow, int endrow) const
 {
    return GpuMat(*this, Range(startrow, endrow), Range::all());
 }
 inline
 GpuMat GpuMat::rowRange(Range r) const
 {
    return GpuMat(*this, r, Range::all());
 }
 inline
 GpuMat GpuMat::colRange(int startcol, int endcol) const
 {
    return GpuMat(*this, Range::all(), Range(startcol, endcol));
 }
 inline
 GpuMat GpuMat::colRange(Range r) const
 {
    return GpuMat(*this, Range::all(), r);
 }
 inline
 GpuMat GpuMat::operator ()(Range rowRange_, Range colRange_) const
 {
    return GpuMat(*this, rowRange_, colRange_);
 }
 inline
 GpuMat GpuMat::operator ()(Rect roi) const
 {
    return GpuMat(*this, roi);
 }
 inline
 bool GpuMat::isContinuous() const
 {
    return (flags & Mat::CONTINUOUS_FLAG) != 0;
 }
 inline
 size_t GpuMat::elemSize() const
 {
    return CV_ELEM_SIZE(flags);
 }
 inline
 size_t GpuMat::elemSize1() const
 {
    return CV_ELEM_SIZE1(flags);
 }
 inline
 int GpuMat::type() const
 {
    return CV_MAT_TYPE(flags);
 }
 inline
 int GpuMat::depth() const
 {
    return CV_MAT_DEPTH(flags);
 }
 inline
 int GpuMat::channels() const
 {
    return CV_MAT_CN(flags);
 }
 inline
 size_t GpuMat::step1() const
 {
    return step / elemSize1();
 }
 inline
 Size GpuMat::size() const
 {
    return Size(cols, rows);
 }
 inline
 bool GpuMat::empty() const
 {
    return data == 0;
 }
 static inline
 GpuMat createContinuous(int rows, int cols, int type)
 {
    GpuMat m;
    createContinuous(rows, cols, type, m);
    return m;
 }
 static inline
 void createContinuous(Size size, int type, OutputArray arr)
 {
    createContinuous(size.height, size.width, type, arr);
 }
 static inline
 GpuMat createContinuous(Size size, int type)
 {
    GpuMat m;
    createContinuous(size, type, m);
    return m;
 }
 static inline
 void ensureSizeIsEnough(Size size, int type, OutputArray arr)
 {
    ensureSizeIsEnough(size.height, size.width, type, arr);
 }
 static inline
 void swap(GpuMat& a, GpuMat& b)
 {
    a.swap(b);
 }
 //===================================================================================
 // HostMem
 //===================================================================================
 inline
 HostMem::HostMem(AllocType alloc_type_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
 {
 }
 inline
 HostMem::HostMem(const HostMem& m)
    : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
 {
    if( refcount )
        CV_XADD(refcount, 1);
 }
 inline
 HostMem::HostMem(int rows_, int cols_, int type_, AllocType alloc_type_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
 {
    if (rows_ > 0 && cols_ > 0)
        create(rows_, cols_, type_);
 }
 inline
 HostMem::HostMem(Size size_, int type_, AllocType alloc_type_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
 {
    if (size_.height > 0 && size_.width > 0)
        create(size_.height, size_.width, type_);
 }
 inline
 HostMem::HostMem(InputArray arr, AllocType alloc_type_)
    : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(alloc_type_)
 {
    arr.getMat().copyTo(*this);
 }
 inline
 HostMem::~HostMem()
 {
    release();
 }
 inline
 HostMem& HostMem::operator =(const HostMem& m)
 {
    if (this != &m)
    {
        HostMem temp(m);
        swap(temp);
    }
    return *this;
 }
 inline
 void HostMem::swap(HostMem& b)
 {
    std::swap(flags, b.flags);
    std::swap(rows, b.rows);
    std::swap(cols, b.cols);
    std::swap(step, b.step);
    std::swap(data, b.data);
    std::swap(datastart, b.datastart);
    std::swap(dataend, b.dataend);
    std::swap(refcount, b.refcount);
    std::swap(alloc_type, b.alloc_type);
 }
 inline
 HostMem HostMem::clone() const
 {
    HostMem m(size(), type(), alloc_type);
    createMatHeader().copyTo(m);
    return m;
 }
 inline
 void HostMem::create(Size size_, int type_)
 {
    create(size_.height, size_.width, type_);
 }
 inline
 Mat HostMem::createMatHeader() const
 {
    return Mat(size(), type(), data, step);
 }
 inline
 bool HostMem::isContinuous() const
 {
    return (flags & Mat::CONTINUOUS_FLAG) != 0;
 }
 inline
 size_t HostMem::elemSize() const
 {
    return CV_ELEM_SIZE(flags);
 }
 inline
 size_t HostMem::elemSize1() const
 {
    return CV_ELEM_SIZE1(flags);
 }
 inline
 int HostMem::type() const
 {
    return CV_MAT_TYPE(flags);
 }
 inline
 int HostMem::depth() const
 {
    return CV_MAT_DEPTH(flags);
 }
 inline
 int HostMem::channels() const
 {
    return CV_MAT_CN(flags);
 }
 inline
 size_t HostMem::step1() const
 {
    return step / elemSize1();
 }
 inline
 Size HostMem::size() const
 {
    return Size(cols, rows);
 }
 inline
 bool HostMem::empty() const
 {
    return data == 0;
 }
 static inline
 void swap(HostMem& a, HostMem& b)
 {
    a.swap(b);
 }
 //===================================================================================
 // Stream
 //===================================================================================
 inline
 Stream::Stream(const Ptr<Impl>& impl)
    : impl_(impl)
 {
 }
 //===================================================================================
 // Event
 //===================================================================================
 inline
 Event::Event(const Ptr<Impl>& impl)
    : impl_(impl)
 {
 }
 //===================================================================================
 // Initialization & Info
 //===================================================================================
 inline
 bool TargetArchs::has(int major, int minor)
 {
    return hasPtx(major, minor) || hasBin(major, minor);
 }
 inline
 bool TargetArchs::hasEqualOrGreater(int major, int minor)
 {
    return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
 }
 inline
 DeviceInfo::DeviceInfo()
 {
    device_id_ = getDevice();
 }
 inline
 DeviceInfo::DeviceInfo(int device_id)
 {
    CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
    device_id_ = device_id;
 }
 inline
 int DeviceInfo::deviceID() const
 {
    return device_id_;
 }
 inline
 size_t DeviceInfo::freeMemory() const
 {
    size_t _totalMemory = 0, _freeMemory = 0;
    queryMemory(_totalMemory, _freeMemory);
    return _freeMemory;
 }
 inline
 size_t DeviceInfo::totalMemory() const
 {
    size_t _totalMemory = 0, _freeMemory = 0;
    queryMemory(_totalMemory, _freeMemory);
    return _totalMemory;
 }
 inline
 bool DeviceInfo::supports(FeatureSet feature_set) const
 {
    int version = majorVersion() * 10 + minorVersion();
    return version >= feature_set;
 }
 }} // namespace cv { namespace cuda {
 //===================================================================================
 // Mat
 //===================================================================================
 namespace cv {
 inline
 Mat::Mat(const cuda::GpuMat& m)
    : flags(0), dims(0), rows(0), cols(0), data(0), datastart(0), dataend(0), datalimit(0), allocator(0), u(0), size(&rows)
 {
    m.download(*this);
 }
 }
 //! @endcond
 #endif // OPENCV_CORE_CUDAINL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/block.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/block.hpp
@ -0,0 +1,211 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_DEVICE_BLOCK_HPP
 #define OPENCV_CUDA_DEVICE_BLOCK_HPP
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    struct Block
    {
        static __device__ __forceinline__ unsigned int id()
        {
            return blockIdx.x;
        }
        static __device__ __forceinline__ unsigned int stride()
        {
            return blockDim.x * blockDim.y * blockDim.z;
        }
        static __device__ __forceinline__ void sync()
        {
            __syncthreads();
        }
        static __device__ __forceinline__ int flattenedThreadId()
        {
            return threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
        }
        template<typename It, typename T>
        static __device__ __forceinline__ void fill(It beg, It end, const T& value)
        {
            int STRIDE = stride();
            It t = beg + flattenedThreadId();
            for(; t < end; t += STRIDE)
                *t = value;
        }
        template<typename OutIt, typename T>
        static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
        {
            int STRIDE = stride();
            int tid = flattenedThreadId();
            value += tid;
            for(OutIt t = beg + tid; t < end; t += STRIDE, value += STRIDE)
                *t = value;
        }
        template<typename InIt, typename OutIt>
        static __device__ __forceinline__ void copy(InIt beg, InIt end, OutIt out)
        {
            int STRIDE = stride();
            InIt  t = beg + flattenedThreadId();
            OutIt o = out + (t - beg);
            for(; t < end; t += STRIDE, o += STRIDE)
                *o = *t;
        }
        template<typename InIt, typename OutIt, class UnOp>
        static __device__ __forceinline__ void transform(InIt beg, InIt end, OutIt out, UnOp op)
        {
            int STRIDE = stride();
            InIt  t = beg + flattenedThreadId();
            OutIt o = out + (t - beg);
            for(; t < end; t += STRIDE, o += STRIDE)
                *o = op(*t);
        }
        template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
        static __device__ __forceinline__ void transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
        {
            int STRIDE = stride();
            InIt1 t1 = beg1 + flattenedThreadId();
            InIt2 t2 = beg2 + flattenedThreadId();
            OutIt o  = out + (t1 - beg1);
            for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
                *o = op(*t1, *t2);
        }
        template<int CTA_SIZE, typename T, class BinOp>
        static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
        {
            int tid = flattenedThreadId();
            T val =  buffer[tid];
            if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
            if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
            if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
            if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }
            if (tid < 32)
            {
                if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
                if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
                if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
                if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
                if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
                if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
            }
        }
        template<int CTA_SIZE, typename T, class BinOp>
        static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
        {
            int tid = flattenedThreadId();
            T val =  buffer[tid] = init;
            __syncthreads();
            if (CTA_SIZE >= 1024) { if (tid < 512) buffer[tid] = val = op(val, buffer[tid + 512]); __syncthreads(); }
            if (CTA_SIZE >=  512) { if (tid < 256) buffer[tid] = val = op(val, buffer[tid + 256]); __syncthreads(); }
            if (CTA_SIZE >=  256) { if (tid < 128) buffer[tid] = val = op(val, buffer[tid + 128]); __syncthreads(); }
            if (CTA_SIZE >=  128) { if (tid <  64) buffer[tid] = val = op(val, buffer[tid +  64]); __syncthreads(); }
            if (tid < 32)
            {
                if (CTA_SIZE >=   64) { buffer[tid] = val = op(val, buffer[tid +  32]); }
                if (CTA_SIZE >=   32) { buffer[tid] = val = op(val, buffer[tid +  16]); }
                if (CTA_SIZE >=   16) { buffer[tid] = val = op(val, buffer[tid +   8]); }
                if (CTA_SIZE >=    8) { buffer[tid] = val = op(val, buffer[tid +   4]); }
                if (CTA_SIZE >=    4) { buffer[tid] = val = op(val, buffer[tid +   2]); }
                if (CTA_SIZE >=    2) { buffer[tid] = val = op(val, buffer[tid +   1]); }
            }
            __syncthreads();
            return buffer[0];
        }
        template <typename T, class BinOp>
        static __device__ __forceinline__ void reduce_n(T* data, unsigned int n, BinOp op)
        {
            int ftid = flattenedThreadId();
            int sft = stride();
            if (sft < n)
            {
                for (unsigned int i = sft + ftid; i < n; i += sft)
                    data[ftid] = op(data[ftid], data[i]);
                __syncthreads();
                n = sft;
            }
            while (n > 1)
            {
                unsigned int half = n/2;
                if (ftid < half)
                    data[ftid] = op(data[ftid], data[n - ftid - 1]);
                __syncthreads();
                n = n - half;
            }
        }
    };
 }}}
 //! @endcond
 #endif /* OPENCV_CUDA_DEVICE_BLOCK_HPP */
--- a/3rdparty/libopencv/include/opencv2/core/cuda/border_interpolate.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/border_interpolate.hpp
@ -0,0 +1,722 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_BORDER_INTERPOLATE_HPP
 #define OPENCV_CUDA_BORDER_INTERPOLATE_HPP
 #include "saturate_cast.hpp"
 #include "vec_traits.hpp"
 #include "vec_math.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    //////////////////////////////////////////////////////////////
    // BrdConstant
    template <typename D> struct BrdRowConstant
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return x >= 0 ? saturate_cast<D>(data[x]) : val;
        }
        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return x < width ? saturate_cast<D>(data[x]) : val;
        }
        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
        }
        int width;
        D val;
    };
    template <typename D> struct BrdColConstant
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
        }
        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
        }
        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
        }
        int height;
        D val;
    };
    template <typename D> struct BrdConstant
    {
        typedef D result_type;
        __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) : height(height_), width(width_), val(val_)
        {
        }
        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
        }
        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
        }
        int height;
        int width;
        D val;
    };
    //////////////////////////////////////////////////////////////
    // BrdReplicate
    template <typename D> struct BrdRowReplicate
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return ::max(x, 0);
        }
        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::min(x, last_col);
        }
        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }
        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_low(x)]);
        }
        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_high(x)]);
        }
        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col(x)]);
        }
        int last_col;
    };
    template <typename D> struct BrdColReplicate
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return ::max(y, 0);
        }
        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::min(y, last_row);
        }
        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }
        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
        }
        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
        }
        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
        }
        int last_row;
    };
    template <typename D> struct BrdReplicate
    {
        typedef D result_type;
        __host__ __device__ __forceinline__ BrdReplicate(int height, int width) : last_row(height - 1), last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return ::max(y, 0);
        }
        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::min(y, last_row);
        }
        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }
        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return ::max(x, 0);
        }
        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::min(x, last_col);
        }
        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }
        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
        }
        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }
        int last_row;
        int last_col;
    };
    //////////////////////////////////////////////////////////////
    // BrdReflect101
    template <typename D> struct BrdRowReflect101
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return ::abs(x) % (last_col + 1);
        }
        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
        }
        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }
        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_low(x)]);
        }
        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_high(x)]);
        }
        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col(x)]);
        }
        int last_col;
    };
    template <typename D> struct BrdColReflect101
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return ::abs(y) % (last_row + 1);
        }
        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
        }
        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }
        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
        }
        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
        }
        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
        }
        int last_row;
    };
    template <typename D> struct BrdReflect101
    {
        typedef D result_type;
        __host__ __device__ __forceinline__ BrdReflect101(int height, int width) : last_row(height - 1), last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return ::abs(y) % (last_row + 1);
        }
        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::abs(last_row - ::abs(last_row - y)) % (last_row + 1);
        }
        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }
        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return ::abs(x) % (last_col + 1);
        }
        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::abs(last_col - ::abs(last_col - x)) % (last_col + 1);
        }
        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }
        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
        }
        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }
        int last_row;
        int last_col;
    };
    //////////////////////////////////////////////////////////////
    // BrdReflect
    template <typename D> struct BrdRowReflect
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdRowReflect(int width) : last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdRowReflect(int width, U) : last_col(width - 1) {}
        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return (::abs(x) - (x < 0)) % (last_col + 1);
        }
        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return ::abs(last_col - ::abs(last_col - x) + (x > last_col)) % (last_col + 1);
        }
        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_high(::abs(x) - (x < 0));
        }
        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_low(x)]);
        }
        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_high(x)]);
        }
        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col(x)]);
        }
        int last_col;
    };
    template <typename D> struct BrdColReflect
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdColReflect(int height) : last_row(height - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdColReflect(int height, U) : last_row(height - 1) {}
        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return (::abs(y) - (y < 0)) % (last_row + 1);
        }
        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return ::abs(last_row - ::abs(last_row - y) + (y > last_row)) % (last_row + 1);
        }
        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_high(::abs(y) - (y < 0));
        }
        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
        }
        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
        }
        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
        }
        int last_row;
    };
    template <typename D> struct BrdReflect
    {
        typedef D result_type;
        __host__ __device__ __forceinline__ BrdReflect(int height, int width) : last_row(height - 1), last_col(width - 1) {}
        template <typename U> __host__ __device__ __forceinline__ BrdReflect(int height, int width, U) : last_row(height - 1), last_col(width - 1) {}
        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return (::abs(y) - (y < 0)) % (last_row + 1);
        }
        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return /*::abs*/(last_row - ::abs(last_row - y) + (y > last_row)) /*% (last_row + 1)*/;
        }
        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_low(idx_row_high(y));
        }
        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return (::abs(x) - (x < 0)) % (last_col + 1);
        }
        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return (last_col - ::abs(last_col - x) + (x > last_col));
        }
        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_low(idx_col_high(x));
        }
        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
        }
        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }
        int last_row;
        int last_col;
    };
    //////////////////////////////////////////////////////////////
    // BrdWrap
    template <typename D> struct BrdRowWrap
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdRowWrap(int width_) : width(width_) {}
        template <typename U> __host__ __device__ __forceinline__ BrdRowWrap(int width_, U) : width(width_) {}
        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return (x >= 0) * x + (x < 0) * (x - ((x - width + 1) / width) * width);
        }
        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return (x < width) * x + (x >= width) * (x % width);
        }
        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_high(idx_col_low(x));
        }
        template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_low(x)]);
        }
        template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col_high(x)]);
        }
        template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
        {
            return saturate_cast<D>(data[idx_col(x)]);
        }
        int width;
    };
    template <typename D> struct BrdColWrap
    {
        typedef D result_type;
        explicit __host__ __device__ __forceinline__ BrdColWrap(int height_) : height(height_) {}
        template <typename U> __host__ __device__ __forceinline__ BrdColWrap(int height_, U) : height(height_) {}
        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return (y >= 0) * y + (y < 0) * (y - ((y - height + 1) / height) * height);
        }
        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return (y < height) * y + (y >= height) * (y % height);
        }
        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_high(idx_row_low(y));
        }
        template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
        }
        template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
        }
        template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
        {
            return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
        }
        int height;
    };
    template <typename D> struct BrdWrap
    {
        typedef D result_type;
        __host__ __device__ __forceinline__ BrdWrap(int height_, int width_) :
            height(height_), width(width_)
        {
        }
        template <typename U>
        __host__ __device__ __forceinline__ BrdWrap(int height_, int width_, U) :
            height(height_), width(width_)
        {
        }
        __device__ __forceinline__ int idx_row_low(int y) const
        {
            return (y >= 0) ? y : (y - ((y - height + 1) / height) * height);
        }
        __device__ __forceinline__ int idx_row_high(int y) const
        {
            return (y < height) ? y : (y % height);
        }
        __device__ __forceinline__ int idx_row(int y) const
        {
            return idx_row_high(idx_row_low(y));
        }
        __device__ __forceinline__ int idx_col_low(int x) const
        {
            return (x >= 0) ? x : (x - ((x - width + 1) / width) * width);
        }
        __device__ __forceinline__ int idx_col_high(int x) const
        {
            return (x < width) ? x : (x % width);
        }
        __device__ __forceinline__ int idx_col(int x) const
        {
            return idx_col_high(idx_col_low(x));
        }
        template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
        {
            return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
        }
        template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
        {
            return saturate_cast<D>(src(idx_row(y), idx_col(x)));
        }
        int height;
        int width;
    };
    //////////////////////////////////////////////////////////////
    // BorderReader
    template <typename Ptr2D, typename B> struct BorderReader
    {
        typedef typename B::result_type elem_type;
        typedef typename Ptr2D::index_type index_type;
        __host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
        __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
        {
            return b.at(y, x, ptr);
        }
        Ptr2D ptr;
        B b;
    };
    // under win32 there is some bug with templated types that passed as kernel parameters
    // with this specialization all works fine
    template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
    {
        typedef typename BrdConstant<D>::result_type elem_type;
        typedef typename Ptr2D::index_type index_type;
        __host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
            src(src_), height(b.height), width(b.width), val(b.val)
        {
        }
        __device__ __forceinline__ D operator ()(index_type y, index_type x) const
        {
            return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
        }
        Ptr2D src;
        int height;
        int width;
        D val;
    };
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_BORDER_INTERPOLATE_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/color.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/color.hpp
@ -0,0 +1,309 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_COLOR_HPP
 #define OPENCV_CUDA_COLOR_HPP
 #include "detail/color_detail.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    // All OPENCV_CUDA_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
    // template <typename T> class ColorSpace1_to_ColorSpace2_traits
    // {
    //     typedef ... functor_type;
    //     static __host__ __device__ functor_type create_functor();
    // };
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
    OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2RGB5x5_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
    #undef OPENCV_CUDA_IMPLEMENT_RGB5x52RGB_TRAITS
    OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
    OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
    #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB_TRAITS
    OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
    OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
    #undef OPENCV_CUDA_IMPLEMENT_GRAY2RGB5x5_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
    OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
    #undef OPENCV_CUDA_IMPLEMENT_RGB5x52GRAY_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2GRAY_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2YUV_TRAITS
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_YUV2RGB_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2YCrCb_TRAITS
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_YCrCb2RGB_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2XYZ_TRAITS
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_XYZ2RGB_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2HSV_TRAITS
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_HSV2RGB_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2HLS_TRAITS
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
    OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
    #undef OPENCV_CUDA_IMPLEMENT_HLS2RGB_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab, 3, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab, 4, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgb_to_lab4, 3, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(rgba_to_lab4, 4, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab, 3, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab, 4, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgr_to_lab4, 3, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(bgra_to_lab4, 4, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab, 3, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab, 4, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgb_to_lab4, 3, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lrgba_to_lab4, 4, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab, 3, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab, 4, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgr_to_lab4, 3, 4, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS(lbgra_to_lab4, 4, 4, false, 0)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2Lab_TRAITS
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgb, 3, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgb, 4, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_rgba, 3, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_rgba, 4, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgr, 3, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgr, 4, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_bgra, 3, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_bgra, 4, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgb, 3, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgb, 4, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lrgba, 3, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lrgba, 4, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgr, 3, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgr, 4, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab_to_lbgra, 3, 4, false, 0)
    OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS(lab4_to_lbgra, 4, 4, false, 0)
    #undef OPENCV_CUDA_IMPLEMENT_Lab2RGB_TRAITS
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv, 3, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv, 4, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgb_to_luv4, 3, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(rgba_to_luv4, 4, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv, 3, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv, 4, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgr_to_luv4, 3, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(bgra_to_luv4, 4, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv, 3, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv, 4, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgb_to_luv4, 3, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lrgba_to_luv4, 4, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv, 3, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv, 4, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgr_to_luv4, 3, 4, false, 0)
    OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS(lbgra_to_luv4, 4, 4, false, 0)
    #undef OPENCV_CUDA_IMPLEMENT_RGB2Luv_TRAITS
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgb, 3, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgb, 4, 3, true, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_rgba, 3, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_rgba, 4, 4, true, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgr, 3, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgr, 4, 3, true, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_bgra, 3, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_bgra, 4, 4, true, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgb, 3, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgb, 4, 3, false, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lrgba, 3, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lrgba, 4, 4, false, 2)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgr, 3, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgr, 4, 3, false, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv_to_lbgra, 3, 4, false, 0)
    OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS(luv4_to_lbgra, 4, 4, false, 0)
    #undef OPENCV_CUDA_IMPLEMENT_Luv2RGB_TRAITS
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_COLOR_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/common.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/common.hpp
@ -0,0 +1,109 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_COMMON_HPP
 #define OPENCV_CUDA_COMMON_HPP
 #include <cuda_runtime.h>
 #include "opencv2/core/cuda_types.hpp"
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/base.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 #ifndef CV_PI_F
    #ifndef CV_PI
        #define CV_PI_F 3.14159265f
    #else
        #define CV_PI_F ((float)CV_PI)
    #endif
 #endif
 namespace cv { namespace cuda {
    static inline void checkCudaError(cudaError_t err, const char* file, const int line, const char* func)
    {
        if (cudaSuccess != err)
            cv::error(cv::Error::GpuApiCallError, cudaGetErrorString(err), func, file, line);
    }
 }}
 #ifndef cudaSafeCall
    #define cudaSafeCall(expr)  cv::cuda::checkCudaError(expr, __FILE__, __LINE__, CV_Func)
 #endif
 namespace cv { namespace cuda
 {
    template <typename T> static inline bool isAligned(const T* ptr, size_t size)
    {
        return reinterpret_cast<size_t>(ptr) % size == 0;
    }
    static inline bool isAligned(size_t step, size_t size)
    {
        return step % size == 0;
    }
 }}
 namespace cv { namespace cuda
 {
    namespace device
    {
        __host__ __device__ __forceinline__ int divUp(int total, int grain)
        {
            return (total + grain - 1) / grain;
        }
        template<class T> inline void bindTexture(const textureReference* tex, const PtrStepSz<T>& img)
        {
            cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
            cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
        }
    }
 }}
 //! @endcond
 #endif // OPENCV_CUDA_COMMON_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/datamov_utils.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/datamov_utils.hpp
@ -0,0 +1,113 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_DATAMOV_UTILS_HPP
 #define OPENCV_CUDA_DATAMOV_UTILS_HPP
 #include "common.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200
        // for Fermi memory space is detected automatically
        template <typename T> struct ForceGlob
        {
            __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val)  { val = ptr[offset];  }
        };
    #else // __CUDA_ARCH__ >= 200
        #if defined(_WIN64) || defined(__LP64__)
            // 64-bit register modifier for inlined asm
            #define OPENCV_CUDA_ASM_PTR "l"
        #else
            // 32-bit register modifier for inlined asm
            #define OPENCV_CUDA_ASM_PTR "r"
        #endif
        template<class T> struct ForceGlob;
        #define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
            template <> struct ForceGlob<base_type> \
            { \
                __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
                { \
                    asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
                } \
            };
        #define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
            template <> struct ForceGlob<base_type> \
            { \
                __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
                { \
                    asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \
                } \
            };
            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar,  u8)
            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar,  s8)
            OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char,   b8)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (ushort, u16, h)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (short,  s16, h)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (uint,   u32, r)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (int,    s32, r)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (float,  f32, f)
            OPENCV_CUDA_DEFINE_FORCE_GLOB  (double, f64, d)
        #undef OPENCV_CUDA_DEFINE_FORCE_GLOB
        #undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B
        #undef OPENCV_CUDA_ASM_PTR
    #endif // __CUDA_ARCH__ >= 200
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_DATAMOV_UTILS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/detail/color_detail.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/detail/color_detail.hpp
--- a/3rdparty/libopencv/include/opencv2/core/cuda/detail/reduce.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/detail/reduce.hpp
@ -0,0 +1,365 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_REDUCE_DETAIL_HPP
 #define OPENCV_CUDA_REDUCE_DETAIL_HPP
 #include <thrust/tuple.h>
 #include "../warp.hpp"
 #include "../warp_shuffle.hpp"
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    namespace reduce_detail
    {
        template <typename T> struct GetType;
        template <typename T> struct GetType<T*>
        {
            typedef T type;
        };
        template <typename T> struct GetType<volatile T*>
        {
            typedef T type;
        };
        template <typename T> struct GetType<T&>
        {
            typedef T type;
        };
        template <unsigned int I, unsigned int N>
        struct For
        {
            template <class PointerTuple, class ValTuple>
            static __device__ void loadToSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
            {
                thrust::get<I>(smem)[tid] = thrust::get<I>(val);
                For<I + 1, N>::loadToSmem(smem, val, tid);
            }
            template <class PointerTuple, class ValTuple>
            static __device__ void loadFromSmem(const PointerTuple& smem, const ValTuple& val, unsigned int tid)
            {
                thrust::get<I>(val) = thrust::get<I>(smem)[tid];
                For<I + 1, N>::loadFromSmem(smem, val, tid);
            }
            template <class PointerTuple, class ValTuple, class OpTuple>
            static __device__ void merge(const PointerTuple& smem, const ValTuple& val, unsigned int tid, unsigned int delta, const OpTuple& op)
            {
                typename GetType<typename thrust::tuple_element<I, PointerTuple>::type>::type reg = thrust::get<I>(smem)[tid + delta];
                thrust::get<I>(smem)[tid] = thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
                For<I + 1, N>::merge(smem, val, tid, delta, op);
            }
            template <class ValTuple, class OpTuple>
            static __device__ void mergeShfl(const ValTuple& val, unsigned int delta, unsigned int width, const OpTuple& op)
            {
                typename GetType<typename thrust::tuple_element<I, ValTuple>::type>::type reg = shfl_down(thrust::get<I>(val), delta, width);
                thrust::get<I>(val) = thrust::get<I>(op)(thrust::get<I>(val), reg);
                For<I + 1, N>::mergeShfl(val, delta, width, op);
            }
        };
        template <unsigned int N>
        struct For<N, N>
        {
            template <class PointerTuple, class ValTuple>
            static __device__ void loadToSmem(const PointerTuple&, const ValTuple&, unsigned int)
            {
            }
            template <class PointerTuple, class ValTuple>
            static __device__ void loadFromSmem(const PointerTuple&, const ValTuple&, unsigned int)
            {
            }
            template <class PointerTuple, class ValTuple, class OpTuple>
            static __device__ void merge(const PointerTuple&, const ValTuple&, unsigned int, unsigned int, const OpTuple&)
            {
            }
            template <class ValTuple, class OpTuple>
            static __device__ void mergeShfl(const ValTuple&, unsigned int, unsigned int, const OpTuple&)
            {
            }
        };
        template <typename T>
        __device__ __forceinline__ void loadToSmem(volatile T* smem, T& val, unsigned int tid)
        {
            smem[tid] = val;
        }
        template <typename T>
        __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& val, unsigned int tid)
        {
            val = smem[tid];
        }
        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
        __device__ __forceinline__ void loadToSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
                                                       const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                                       unsigned int tid)
        {
            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadToSmem(smem, val, tid);
        }
        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9>
        __device__ __forceinline__ void loadFromSmem(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
                                                         const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                                         unsigned int tid)
        {
            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::loadFromSmem(smem, val, tid);
        }
        template <typename T, class Op>
        __device__ __forceinline__ void merge(volatile T* smem, T& val, unsigned int tid, unsigned int delta, const Op& op)
        {
            T reg = smem[tid + delta];
            smem[tid] = val = op(val, reg);
        }
        template <typename T, class Op>
        __device__ __forceinline__ void mergeShfl(T& val, unsigned int delta, unsigned int width, const Op& op)
        {
            T reg = shfl_down(val, delta, width);
            val = op(val, reg);
        }
        template <typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
                  typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
                  class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
        __device__ __forceinline__ void merge(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
                                              const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                              unsigned int tid,
                                              unsigned int delta,
                                              const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
        {
            For<0, thrust::tuple_size<thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9> >::value>::merge(smem, val, tid, delta, op);
        }
        template <typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
                  class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
        __device__ __forceinline__ void mergeShfl(const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                                  unsigned int delta,
                                                  unsigned int width,
                                                  const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
        {
            For<0, thrust::tuple_size<thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9> >::value>::mergeShfl(val, delta, width, op);
        }
        template <unsigned int N> struct Generic
        {
            template <typename Pointer, typename Reference, class Op>
            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
            {
                loadToSmem(smem, val, tid);
                if (N >= 32)
                    __syncthreads();
                if (N >= 2048)
                {
                    if (tid < 1024)
                        merge(smem, val, tid, 1024, op);
                    __syncthreads();
                }
                if (N >= 1024)
                {
                    if (tid < 512)
                        merge(smem, val, tid, 512, op);
                    __syncthreads();
                }
                if (N >= 512)
                {
                    if (tid < 256)
                        merge(smem, val, tid, 256, op);
                    __syncthreads();
                }
                if (N >= 256)
                {
                    if (tid < 128)
                        merge(smem, val, tid, 128, op);
                    __syncthreads();
                }
                if (N >= 128)
                {
                    if (tid < 64)
                        merge(smem, val, tid, 64, op);
                    __syncthreads();
                }
                if (N >= 64)
                {
                    if (tid < 32)
                        merge(smem, val, tid, 32, op);
                }
                if (tid < 16)
                {
                    merge(smem, val, tid, 16, op);
                    merge(smem, val, tid, 8, op);
                    merge(smem, val, tid, 4, op);
                    merge(smem, val, tid, 2, op);
                    merge(smem, val, tid, 1, op);
                }
            }
        };
        template <unsigned int I, typename Pointer, typename Reference, class Op>
        struct Unroll
        {
            static __device__ void loopShfl(Reference val, Op op, unsigned int N)
            {
                mergeShfl(val, I, N, op);
                Unroll<I / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
            }
            static __device__ void loop(Pointer smem, Reference val, unsigned int tid, Op op)
            {
                merge(smem, val, tid, I, op);
                Unroll<I / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
            }
        };
        template <typename Pointer, typename Reference, class Op>
        struct Unroll<0, Pointer, Reference, Op>
        {
            static __device__ void loopShfl(Reference, Op, unsigned int)
            {
            }
            static __device__ void loop(Pointer, Reference, unsigned int, Op)
            {
            }
        };
        template <unsigned int N> struct WarpOptimized
        {
            template <typename Pointer, typename Reference, class Op>
            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
            {
            #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
                (void) smem;
                (void) tid;
                Unroll<N / 2, Pointer, Reference, Op>::loopShfl(val, op, N);
            #else
                loadToSmem(smem, val, tid);
                if (tid < N / 2)
                    Unroll<N / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
            #endif
            }
        };
        template <unsigned int N> struct GenericOptimized32
        {
            enum { M = N / 32 };
            template <typename Pointer, typename Reference, class Op>
            static __device__ void reduce(Pointer smem, Reference val, unsigned int tid, Op op)
            {
                const unsigned int laneId = Warp::laneId();
            #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
                Unroll<16, Pointer, Reference, Op>::loopShfl(val, op, warpSize);
                if (laneId == 0)
                    loadToSmem(smem, val, tid / 32);
            #else
                loadToSmem(smem, val, tid);
                if (laneId < 16)
                    Unroll<16, Pointer, Reference, Op>::loop(smem, val, tid, op);
                __syncthreads();
                if (laneId == 0)
                    loadToSmem(smem, val, tid / 32);
            #endif
                __syncthreads();
                loadFromSmem(smem, val, tid);
                if (tid < 32)
                {
                #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
                    Unroll<M / 2, Pointer, Reference, Op>::loopShfl(val, op, M);
                #else
                    Unroll<M / 2, Pointer, Reference, Op>::loop(smem, val, tid, op);
                #endif
                }
            }
        };
        template <bool val, class T1, class T2> struct StaticIf;
        template <class T1, class T2> struct StaticIf<true, T1, T2>
        {
            typedef T1 type;
        };
        template <class T1, class T2> struct StaticIf<false, T1, T2>
        {
            typedef T2 type;
        };
        template <unsigned int N> struct IsPowerOf2
        {
            enum { value = ((N != 0) && !(N & (N - 1))) };
        };
        template <unsigned int N> struct Dispatcher
        {
            typedef typename StaticIf<
                (N <= 32) && IsPowerOf2<N>::value,
                WarpOptimized<N>,
                typename StaticIf<
                    (N <= 1024) && IsPowerOf2<N>::value,
                    GenericOptimized32<N>,
                    Generic<N>
                >::type
            >::type reductor;
        };
    }
 }}}
 //! @endcond
 #endif // OPENCV_CUDA_REDUCE_DETAIL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/detail/reduce_key_val.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/detail/reduce_key_val.hpp
@ -0,0 +1,502 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
 #define OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
 #include <thrust/tuple.h>
 #include "../warp.hpp"
 #include "../warp_shuffle.hpp"
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    namespace reduce_key_val_detail
    {
        template <typename T> struct GetType;
        template <typename T> struct GetType<T*>
        {
            typedef T type;
        };
        template <typename T> struct GetType<volatile T*>
        {
            typedef T type;
        };
        template <typename T> struct GetType<T&>
        {
            typedef T type;
        };
        template <unsigned int I, unsigned int N>
        struct For
        {
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void loadToSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
            {
                thrust::get<I>(smem)[tid] = thrust::get<I>(data);
                For<I + 1, N>::loadToSmem(smem, data, tid);
            }
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void loadFromSmem(const PointerTuple& smem, const ReferenceTuple& data, unsigned int tid)
            {
                thrust::get<I>(data) = thrust::get<I>(smem)[tid];
                For<I + 1, N>::loadFromSmem(smem, data, tid);
            }
            template <class ReferenceTuple>
            static __device__ void copyShfl(const ReferenceTuple& val, unsigned int delta, int width)
            {
                thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
                For<I + 1, N>::copyShfl(val, delta, width);
            }
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void copy(const PointerTuple& svals, const ReferenceTuple& val, unsigned int tid, unsigned int delta)
            {
                thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
                For<I + 1, N>::copy(svals, val, tid, delta);
            }
            template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
            static __device__ void mergeShfl(const KeyReferenceTuple& key, const ValReferenceTuple& val, const CmpTuple& cmp, unsigned int delta, int width)
            {
                typename GetType<typename thrust::tuple_element<I, KeyReferenceTuple>::type>::type reg = shfl_down(thrust::get<I>(key), delta, width);
                if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
                {
                    thrust::get<I>(key) = reg;
                    thrust::get<I>(val) = shfl_down(thrust::get<I>(val), delta, width);
                }
                For<I + 1, N>::mergeShfl(key, val, cmp, delta, width);
            }
            template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
            static __device__ void merge(const KeyPointerTuple& skeys, const KeyReferenceTuple& key,
                                         const ValPointerTuple& svals, const ValReferenceTuple& val,
                                         const CmpTuple& cmp,
                                         unsigned int tid, unsigned int delta)
            {
                typename GetType<typename thrust::tuple_element<I, KeyPointerTuple>::type>::type reg = thrust::get<I>(skeys)[tid + delta];
                if (thrust::get<I>(cmp)(reg, thrust::get<I>(key)))
                {
                    thrust::get<I>(skeys)[tid] = thrust::get<I>(key) = reg;
                    thrust::get<I>(svals)[tid] = thrust::get<I>(val) = thrust::get<I>(svals)[tid + delta];
                }
                For<I + 1, N>::merge(skeys, key, svals, val, cmp, tid, delta);
            }
        };
        template <unsigned int N>
        struct For<N, N>
        {
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void loadToSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
            {
            }
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void loadFromSmem(const PointerTuple&, const ReferenceTuple&, unsigned int)
            {
            }
            template <class ReferenceTuple>
            static __device__ void copyShfl(const ReferenceTuple&, unsigned int, int)
            {
            }
            template <class PointerTuple, class ReferenceTuple>
            static __device__ void copy(const PointerTuple&, const ReferenceTuple&, unsigned int, unsigned int)
            {
            }
            template <class KeyReferenceTuple, class ValReferenceTuple, class CmpTuple>
            static __device__ void mergeShfl(const KeyReferenceTuple&, const ValReferenceTuple&, const CmpTuple&, unsigned int, int)
            {
            }
            template <class KeyPointerTuple, class KeyReferenceTuple, class ValPointerTuple, class ValReferenceTuple, class CmpTuple>
            static __device__ void merge(const KeyPointerTuple&, const KeyReferenceTuple&,
                                         const ValPointerTuple&, const ValReferenceTuple&,
                                         const CmpTuple&,
                                         unsigned int, unsigned int)
            {
            }
        };
        //////////////////////////////////////////////////////
        // loadToSmem
        template <typename T>
        __device__ __forceinline__ void loadToSmem(volatile T* smem, T& data, unsigned int tid)
        {
            smem[tid] = data;
        }
        template <typename T>
        __device__ __forceinline__ void loadFromSmem(volatile T* smem, T& data, unsigned int tid)
        {
            data = smem[tid];
        }
        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
        __device__ __forceinline__ void loadToSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
                                                   const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
                                                   unsigned int tid)
        {
            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadToSmem(smem, data, tid);
        }
        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
        __device__ __forceinline__ void loadFromSmem(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& smem,
                                                     const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& data,
                                                     unsigned int tid)
        {
            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::loadFromSmem(smem, data, tid);
        }
        //////////////////////////////////////////////////////
        // copyVals
        template <typename V>
        __device__ __forceinline__ void copyValsShfl(V& val, unsigned int delta, int width)
        {
            val = shfl_down(val, delta, width);
        }
        template <typename V>
        __device__ __forceinline__ void copyVals(volatile V* svals, V& val, unsigned int tid, unsigned int delta)
        {
            svals[tid] = val = svals[tid + delta];
        }
        template <typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
        __device__ __forceinline__ void copyValsShfl(const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                     unsigned int delta,
                                                     int width)
        {
            For<0, thrust::tuple_size<thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9> >::value>::copyShfl(val, delta, width);
        }
        template <typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9>
        __device__ __forceinline__ void copyVals(const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                 unsigned int tid, unsigned int delta)
        {
            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::copy(svals, val, tid, delta);
        }
        //////////////////////////////////////////////////////
        // merge
        template <typename K, typename V, class Cmp>
        __device__ __forceinline__ void mergeShfl(K& key, V& val, const Cmp& cmp, unsigned int delta, int width)
        {
            K reg = shfl_down(key, delta, width);
            if (cmp(reg, key))
            {
                key = reg;
                copyValsShfl(val, delta, width);
            }
        }
        template <typename K, typename V, class Cmp>
        __device__ __forceinline__ void merge(volatile K* skeys, K& key, volatile V* svals, V& val, const Cmp& cmp, unsigned int tid, unsigned int delta)
        {
            K reg = skeys[tid + delta];
            if (cmp(reg, key))
            {
                skeys[tid] = key = reg;
                copyVals(svals, val, tid, delta);
            }
        }
        template <typename K,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
                  class Cmp>
        __device__ __forceinline__ void mergeShfl(K& key,
                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                  const Cmp& cmp,
                                                  unsigned int delta, int width)
        {
            K reg = shfl_down(key, delta, width);
            if (cmp(reg, key))
            {
                key = reg;
                copyValsShfl(val, delta, width);
            }
        }
        template <typename K,
                  typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
                  class Cmp>
        __device__ __forceinline__ void merge(volatile K* skeys, K& key,
                                              const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                              const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                              const Cmp& cmp, unsigned int tid, unsigned int delta)
        {
            K reg = skeys[tid + delta];
            if (cmp(reg, key))
            {
                skeys[tid] = key = reg;
                copyVals(svals, val, tid, delta);
            }
        }
        template <typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
                  class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
        __device__ __forceinline__ void mergeShfl(const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
                                                  const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                  const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
                                                  unsigned int delta, int width)
        {
            For<0, thrust::tuple_size<thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9> >::value>::mergeShfl(key, val, cmp, delta, width);
        }
        template <typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
                  typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
                  typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
                  typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
                  class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
        __device__ __forceinline__ void merge(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
                                              const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
                                              const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                              const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                              const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp,
                                              unsigned int tid, unsigned int delta)
        {
            For<0, thrust::tuple_size<thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9> >::value>::merge(skeys, key, svals, val, cmp, tid, delta);
        }
        //////////////////////////////////////////////////////
        // Generic
        template <unsigned int N> struct Generic
        {
            template <class KP, class KR, class VP, class VR, class Cmp>
            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
            {
                loadToSmem(skeys, key, tid);
                loadValsToSmem(svals, val, tid);
                if (N >= 32)
                    __syncthreads();
                if (N >= 2048)
                {
                    if (tid < 1024)
                        merge(skeys, key, svals, val, cmp, tid, 1024);
                    __syncthreads();
                }
                if (N >= 1024)
                {
                    if (tid < 512)
                        merge(skeys, key, svals, val, cmp, tid, 512);
                    __syncthreads();
                }
                if (N >= 512)
                {
                    if (tid < 256)
                        merge(skeys, key, svals, val, cmp, tid, 256);
                    __syncthreads();
                }
                if (N >= 256)
                {
                    if (tid < 128)
                        merge(skeys, key, svals, val, cmp, tid, 128);
                    __syncthreads();
                }
                if (N >= 128)
                {
                    if (tid < 64)
                        merge(skeys, key, svals, val, cmp, tid, 64);
                    __syncthreads();
                }
                if (N >= 64)
                {
                    if (tid < 32)
                        merge(skeys, key, svals, val, cmp, tid, 32);
                }
                if (tid < 16)
                {
                    merge(skeys, key, svals, val, cmp, tid, 16);
                    merge(skeys, key, svals, val, cmp, tid, 8);
                    merge(skeys, key, svals, val, cmp, tid, 4);
                    merge(skeys, key, svals, val, cmp, tid, 2);
                    merge(skeys, key, svals, val, cmp, tid, 1);
                }
            }
        };
        template <unsigned int I, class KP, class KR, class VP, class VR, class Cmp>
        struct Unroll
        {
            static __device__ void loopShfl(KR key, VR val, Cmp cmp, unsigned int N)
            {
                mergeShfl(key, val, cmp, I, N);
                Unroll<I / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
            }
            static __device__ void loop(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
            {
                merge(skeys, key, svals, val, cmp, tid, I);
                Unroll<I / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
            }
        };
        template <class KP, class KR, class VP, class VR, class Cmp>
        struct Unroll<0, KP, KR, VP, VR, Cmp>
        {
            static __device__ void loopShfl(KR, VR, Cmp, unsigned int)
            {
            }
            static __device__ void loop(KP, KR, VP, VR, unsigned int, Cmp)
            {
            }
        };
        template <unsigned int N> struct WarpOptimized
        {
            template <class KP, class KR, class VP, class VR, class Cmp>
            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
            {
            #if 0 // __CUDA_ARCH__ >= 300
                (void) skeys;
                (void) svals;
                (void) tid;
                Unroll<N / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, N);
            #else
                loadToSmem(skeys, key, tid);
                loadToSmem(svals, val, tid);
                if (tid < N / 2)
                    Unroll<N / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
            #endif
            }
        };
        template <unsigned int N> struct GenericOptimized32
        {
            enum { M = N / 32 };
            template <class KP, class KR, class VP, class VR, class Cmp>
            static __device__ void reduce(KP skeys, KR key, VP svals, VR val, unsigned int tid, Cmp cmp)
            {
                const unsigned int laneId = Warp::laneId();
            #if 0 // __CUDA_ARCH__ >= 300
                Unroll<16, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, warpSize);
                if (laneId == 0)
                {
                    loadToSmem(skeys, key, tid / 32);
                    loadToSmem(svals, val, tid / 32);
                }
            #else
                loadToSmem(skeys, key, tid);
                loadToSmem(svals, val, tid);
                if (laneId < 16)
                    Unroll<16, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
                __syncthreads();
                if (laneId == 0)
                {
                    loadToSmem(skeys, key, tid / 32);
                    loadToSmem(svals, val, tid / 32);
                }
            #endif
                __syncthreads();
                loadFromSmem(skeys, key, tid);
                if (tid < 32)
                {
                #if 0 // __CUDA_ARCH__ >= 300
                    loadFromSmem(svals, val, tid);
                    Unroll<M / 2, KP, KR, VP, VR, Cmp>::loopShfl(key, val, cmp, M);
                #else
                    Unroll<M / 2, KP, KR, VP, VR, Cmp>::loop(skeys, key, svals, val, tid, cmp);
                #endif
                }
            }
        };
        template <bool val, class T1, class T2> struct StaticIf;
        template <class T1, class T2> struct StaticIf<true, T1, T2>
        {
            typedef T1 type;
        };
        template <class T1, class T2> struct StaticIf<false, T1, T2>
        {
            typedef T2 type;
        };
        template <unsigned int N> struct IsPowerOf2
        {
            enum { value = ((N != 0) && !(N & (N - 1))) };
        };
        template <unsigned int N> struct Dispatcher
        {
            typedef typename StaticIf<
                (N <= 32) && IsPowerOf2<N>::value,
                WarpOptimized<N>,
                typename StaticIf<
                    (N <= 1024) && IsPowerOf2<N>::value,
                    GenericOptimized32<N>,
                    Generic<N>
                >::type
            >::type reductor;
        };
    }
 }}}
 //! @endcond
 #endif // OPENCV_CUDA_PRED_VAL_REDUCE_DETAIL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/detail/transform_detail.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/detail/transform_detail.hpp
@ -0,0 +1,399 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_TRANSFORM_DETAIL_HPP
 #define OPENCV_CUDA_TRANSFORM_DETAIL_HPP
 #include "../common.hpp"
 #include "../vec_traits.hpp"
 #include "../functional.hpp"
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    namespace transform_detail
    {
        //! Read Write Traits
        template <typename T, typename D, int shift> struct UnaryReadWriteTraits
        {
            typedef typename TypeVec<T, shift>::vec_type read_type;
            typedef typename TypeVec<D, shift>::vec_type write_type;
        };
        template <typename T1, typename T2, typename D, int shift> struct BinaryReadWriteTraits
        {
            typedef typename TypeVec<T1, shift>::vec_type read_type1;
            typedef typename TypeVec<T2, shift>::vec_type read_type2;
            typedef typename TypeVec<D, shift>::vec_type write_type;
        };
        //! Transform kernels
        template <int shift> struct OpUnroller;
        template <> struct OpUnroller<1>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src.x);
            }
            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src1.x, src2.x);
            }
        };
        template <> struct OpUnroller<2>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src.y);
            }
            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src1.x, src2.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src1.y, src2.y);
            }
        };
        template <> struct OpUnroller<3>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src.y);
                if (mask(y, x_shifted + 2))
                    dst.z = op(src.z);
            }
            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src1.x, src2.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src1.y, src2.y);
                if (mask(y, x_shifted + 2))
                    dst.z = op(src1.z, src2.z);
            }
        };
        template <> struct OpUnroller<4>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src.y);
                if (mask(y, x_shifted + 2))
                    dst.z = op(src.z);
                if (mask(y, x_shifted + 3))
                    dst.w = op(src.w);
            }
            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.x = op(src1.x, src2.x);
                if (mask(y, x_shifted + 1))
                    dst.y = op(src1.y, src2.y);
                if (mask(y, x_shifted + 2))
                    dst.z = op(src1.z, src2.z);
                if (mask(y, x_shifted + 3))
                    dst.w = op(src1.w, src2.w);
            }
        };
        template <> struct OpUnroller<8>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, const UnOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.a0 = op(src.a0);
                if (mask(y, x_shifted + 1))
                    dst.a1 = op(src.a1);
                if (mask(y, x_shifted + 2))
                    dst.a2 = op(src.a2);
                if (mask(y, x_shifted + 3))
                    dst.a3 = op(src.a3);
                if (mask(y, x_shifted + 4))
                    dst.a4 = op(src.a4);
                if (mask(y, x_shifted + 5))
                    dst.a5 = op(src.a5);
                if (mask(y, x_shifted + 6))
                    dst.a6 = op(src.a6);
                if (mask(y, x_shifted + 7))
                    dst.a7 = op(src.a7);
            }
            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, const BinOp& op, int x_shifted, int y)
            {
                if (mask(y, x_shifted))
                    dst.a0 = op(src1.a0, src2.a0);
                if (mask(y, x_shifted + 1))
                    dst.a1 = op(src1.a1, src2.a1);
                if (mask(y, x_shifted + 2))
                    dst.a2 = op(src1.a2, src2.a2);
                if (mask(y, x_shifted + 3))
                    dst.a3 = op(src1.a3, src2.a3);
                if (mask(y, x_shifted + 4))
                    dst.a4 = op(src1.a4, src2.a4);
                if (mask(y, x_shifted + 5))
                    dst.a5 = op(src1.a5, src2.a5);
                if (mask(y, x_shifted + 6))
                    dst.a6 = op(src1.a6, src2.a6);
                if (mask(y, x_shifted + 7))
                    dst.a7 = op(src1.a7, src2.a7);
            }
        };
        template <typename T, typename D, typename UnOp, typename Mask>
        static __global__ void transformSmart(const PtrStepSz<T> src_, PtrStep<D> dst_, const Mask mask, const UnOp op)
        {
            typedef TransformFunctorTraits<UnOp> ft;
            typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::read_type read_type;
            typedef typename UnaryReadWriteTraits<T, D, ft::smart_shift>::write_type write_type;
            const int x = threadIdx.x + blockIdx.x * blockDim.x;
            const int y = threadIdx.y + blockIdx.y * blockDim.y;
            const int x_shifted = x * ft::smart_shift;
            if (y < src_.rows)
            {
                const T* src = src_.ptr(y);
                D* dst = dst_.ptr(y);
                if (x_shifted + ft::smart_shift - 1 < src_.cols)
                {
                    const read_type src_n_el = ((const read_type*)src)[x];
                    write_type dst_n_el = ((const write_type*)dst)[x];
                    OpUnroller<ft::smart_shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
                    ((write_type*)dst)[x] = dst_n_el;
                }
                else
                {
                    for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
                    {
                        if (mask(y, real_x))
                            dst[real_x] = op(src[real_x]);
                    }
                }
            }
        }
        template <typename T, typename D, typename UnOp, typename Mask>
        __global__ static void transformSimple(const PtrStepSz<T> src, PtrStep<D> dst, const Mask mask, const UnOp op)
        {
            const int x = blockDim.x * blockIdx.x + threadIdx.x;
            const int y = blockDim.y * blockIdx.y + threadIdx.y;
            if (x < src.cols && y < src.rows && mask(y, x))
            {
                dst.ptr(y)[x] = op(src.ptr(y)[x]);
            }
        }
        template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
        static __global__ void transformSmart(const PtrStepSz<T1> src1_, const PtrStep<T2> src2_, PtrStep<D> dst_,
            const Mask mask, const BinOp op)
        {
            typedef TransformFunctorTraits<BinOp> ft;
            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type1 read_type1;
            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::read_type2 read_type2;
            typedef typename BinaryReadWriteTraits<T1, T2, D, ft::smart_shift>::write_type write_type;
            const int x = threadIdx.x + blockIdx.x * blockDim.x;
            const int y = threadIdx.y + blockIdx.y * blockDim.y;
            const int x_shifted = x * ft::smart_shift;
            if (y < src1_.rows)
            {
                const T1* src1 = src1_.ptr(y);
                const T2* src2 = src2_.ptr(y);
                D* dst = dst_.ptr(y);
                if (x_shifted + ft::smart_shift - 1 < src1_.cols)
                {
                    const read_type1 src1_n_el = ((const read_type1*)src1)[x];
                    const read_type2 src2_n_el = ((const read_type2*)src2)[x];
                    write_type dst_n_el = ((const write_type*)dst)[x];
                    OpUnroller<ft::smart_shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
                    ((write_type*)dst)[x] = dst_n_el;
                }
                else
                {
                    for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
                    {
                        if (mask(y, real_x))
                            dst[real_x] = op(src1[real_x], src2[real_x]);
                    }
                }
            }
        }
        template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
        static __global__ void transformSimple(const PtrStepSz<T1> src1, const PtrStep<T2> src2, PtrStep<D> dst,
            const Mask mask, const BinOp op)
        {
            const int x = blockDim.x * blockIdx.x + threadIdx.x;
            const int y = blockDim.y * blockIdx.y + threadIdx.y;
            if (x < src1.cols && y < src1.rows && mask(y, x))
            {
                const T1 src1_data = src1.ptr(y)[x];
                const T2 src2_data = src2.ptr(y)[x];
                dst.ptr(y)[x] = op(src1_data, src2_data);
            }
        }
        template <bool UseSmart> struct TransformDispatcher;
        template<> struct TransformDispatcher<false>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
            {
                typedef TransformFunctorTraits<UnOp> ft;
                const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
                const dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y), 1);
                transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
                cudaSafeCall( cudaGetLastError() );
                if (stream == 0)
                    cudaSafeCall( cudaDeviceSynchronize() );
            }
            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
            {
                typedef TransformFunctorTraits<BinOp> ft;
                const dim3 threads(ft::simple_block_dim_x, ft::simple_block_dim_y, 1);
                const dim3 grid(divUp(src1.cols, threads.x), divUp(src1.rows, threads.y), 1);
                transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
                cudaSafeCall( cudaGetLastError() );
                if (stream == 0)
                    cudaSafeCall( cudaDeviceSynchronize() );
            }
        };
        template<> struct TransformDispatcher<true>
        {
            template <typename T, typename D, typename UnOp, typename Mask>
            static void call(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, Mask mask, cudaStream_t stream)
            {
                typedef TransformFunctorTraits<UnOp> ft;
                CV_StaticAssert(ft::smart_shift != 1, "");
                if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(src.step, ft::smart_shift * sizeof(T)) ||
                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
                {
                    TransformDispatcher<false>::call(src, dst, op, mask, stream);
                    return;
                }
                const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
                const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
                transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
                cudaSafeCall( cudaGetLastError() );
                if (stream == 0)
                    cudaSafeCall( cudaDeviceSynchronize() );
            }
            template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
            static void call(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, Mask mask, cudaStream_t stream)
            {
                typedef TransformFunctorTraits<BinOp> ft;
                CV_StaticAssert(ft::smart_shift != 1, "");
                if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src1.step, ft::smart_shift * sizeof(T1)) ||
                    !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(src2.step, ft::smart_shift * sizeof(T2)) ||
                    !isAligned(dst.data, ft::smart_shift * sizeof(D)) || !isAligned(dst.step, ft::smart_shift * sizeof(D)))
                {
                    TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
                    return;
                }
                const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
                const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
                transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
                cudaSafeCall( cudaGetLastError() );
                if (stream == 0)
                    cudaSafeCall( cudaDeviceSynchronize() );
            }
        };
    } // namespace transform_detail
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_TRANSFORM_DETAIL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/detail/type_traits_detail.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/detail/type_traits_detail.hpp
@ -0,0 +1,191 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
 #define OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
 #include "../common.hpp"
 #include "../vec_traits.hpp"
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    namespace type_traits_detail
    {
        template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
        template <typename T1, typename T2> struct Select<false, T1, T2> { typedef T2 type; };
        template <typename T> struct IsSignedIntergral { enum {value = 0}; };
        template <> struct IsSignedIntergral<schar> { enum {value = 1}; };
        template <> struct IsSignedIntergral<char1> { enum {value = 1}; };
        template <> struct IsSignedIntergral<short> { enum {value = 1}; };
        template <> struct IsSignedIntergral<short1> { enum {value = 1}; };
        template <> struct IsSignedIntergral<int> { enum {value = 1}; };
        template <> struct IsSignedIntergral<int1> { enum {value = 1}; };
        template <typename T> struct IsUnsignedIntegral { enum {value = 0}; };
        template <> struct IsUnsignedIntegral<uchar> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<uchar1> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<ushort> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<ushort1> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<uint> { enum {value = 1}; };
        template <> struct IsUnsignedIntegral<uint1> { enum {value = 1}; };
        template <typename T> struct IsIntegral { enum {value = IsSignedIntergral<T>::value || IsUnsignedIntegral<T>::value}; };
        template <> struct IsIntegral<char> { enum {value = 1}; };
        template <> struct IsIntegral<bool> { enum {value = 1}; };
        template <typename T> struct IsFloat { enum {value = 0}; };
        template <> struct IsFloat<float> { enum {value = 1}; };
        template <> struct IsFloat<double> { enum {value = 1}; };
        template <typename T> struct IsVec { enum {value = 0}; };
        template <> struct IsVec<uchar1> { enum {value = 1}; };
        template <> struct IsVec<uchar2> { enum {value = 1}; };
        template <> struct IsVec<uchar3> { enum {value = 1}; };
        template <> struct IsVec<uchar4> { enum {value = 1}; };
        template <> struct IsVec<uchar8> { enum {value = 1}; };
        template <> struct IsVec<char1> { enum {value = 1}; };
        template <> struct IsVec<char2> { enum {value = 1}; };
        template <> struct IsVec<char3> { enum {value = 1}; };
        template <> struct IsVec<char4> { enum {value = 1}; };
        template <> struct IsVec<char8> { enum {value = 1}; };
        template <> struct IsVec<ushort1> { enum {value = 1}; };
        template <> struct IsVec<ushort2> { enum {value = 1}; };
        template <> struct IsVec<ushort3> { enum {value = 1}; };
        template <> struct IsVec<ushort4> { enum {value = 1}; };
        template <> struct IsVec<ushort8> { enum {value = 1}; };
        template <> struct IsVec<short1> { enum {value = 1}; };
        template <> struct IsVec<short2> { enum {value = 1}; };
        template <> struct IsVec<short3> { enum {value = 1}; };
        template <> struct IsVec<short4> { enum {value = 1}; };
        template <> struct IsVec<short8> { enum {value = 1}; };
        template <> struct IsVec<uint1> { enum {value = 1}; };
        template <> struct IsVec<uint2> { enum {value = 1}; };
        template <> struct IsVec<uint3> { enum {value = 1}; };
        template <> struct IsVec<uint4> { enum {value = 1}; };
        template <> struct IsVec<uint8> { enum {value = 1}; };
        template <> struct IsVec<int1> { enum {value = 1}; };
        template <> struct IsVec<int2> { enum {value = 1}; };
        template <> struct IsVec<int3> { enum {value = 1}; };
        template <> struct IsVec<int4> { enum {value = 1}; };
        template <> struct IsVec<int8> { enum {value = 1}; };
        template <> struct IsVec<float1> { enum {value = 1}; };
        template <> struct IsVec<float2> { enum {value = 1}; };
        template <> struct IsVec<float3> { enum {value = 1}; };
        template <> struct IsVec<float4> { enum {value = 1}; };
        template <> struct IsVec<float8> { enum {value = 1}; };
        template <> struct IsVec<double1> { enum {value = 1}; };
        template <> struct IsVec<double2> { enum {value = 1}; };
        template <> struct IsVec<double3> { enum {value = 1}; };
        template <> struct IsVec<double4> { enum {value = 1}; };
        template <> struct IsVec<double8> { enum {value = 1}; };
        template <class U> struct AddParameterType { typedef const U& type; };
        template <class U> struct AddParameterType<U&> { typedef U& type; };
        template <> struct AddParameterType<void> { typedef void type; };
        template <class U> struct ReferenceTraits
        {
            enum { value = false };
            typedef U type;
        };
        template <class U> struct ReferenceTraits<U&>
        {
            enum { value = true };
            typedef U type;
        };
        template <class U> struct PointerTraits
        {
            enum { value = false };
            typedef void type;
        };
        template <class U> struct PointerTraits<U*>
        {
            enum { value = true };
            typedef U type;
        };
        template <class U> struct PointerTraits<U*&>
        {
            enum { value = true };
            typedef U type;
        };
        template <class U> struct UnConst
        {
            typedef U type;
            enum { value = 0 };
        };
        template <class U> struct UnConst<const U>
        {
            typedef U type;
            enum { value = 1 };
        };
        template <class U> struct UnConst<const U&>
        {
            typedef U& type;
            enum { value = 1 };
        };
        template <class U> struct UnVolatile
        {
            typedef U type;
            enum { value = 0 };
        };
        template <class U> struct UnVolatile<volatile U>
        {
            typedef U type;
            enum { value = 1 };
        };
        template <class U> struct UnVolatile<volatile U&>
        {
            typedef U& type;
            enum { value = 1 };
        };
    } // namespace type_traits_detail
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_TYPE_TRAITS_DETAIL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/detail/vec_distance_detail.hpp
@ -0,0 +1,121 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
 #define OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
 #include "../datamov_utils.hpp"
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    namespace vec_distance_detail
    {
        template <int THREAD_DIM, int N> struct UnrollVecDiffCached
        {
            template <typename Dist, typename T1, typename T2>
            static __device__ void calcCheck(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int ind)
            {
                if (ind < len)
                {
                    T1 val1 = *vecCached++;
                    T2 val2;
                    ForceGlob<T2>::Load(vecGlob, ind, val2);
                    dist.reduceIter(val1, val2);
                    UnrollVecDiffCached<THREAD_DIM, N - 1>::calcCheck(vecCached, vecGlob, len, dist, ind + THREAD_DIM);
                }
            }
            template <typename Dist, typename T1, typename T2>
            static __device__ void calcWithoutCheck(const T1* vecCached, const T2* vecGlob, Dist& dist)
            {
                T1 val1 = *vecCached++;
                T2 val2;
                ForceGlob<T2>::Load(vecGlob, 0, val2);
                vecGlob += THREAD_DIM;
                dist.reduceIter(val1, val2);
                UnrollVecDiffCached<THREAD_DIM, N - 1>::calcWithoutCheck(vecCached, vecGlob, dist);
            }
        };
        template <int THREAD_DIM> struct UnrollVecDiffCached<THREAD_DIM, 0>
        {
            template <typename Dist, typename T1, typename T2>
            static __device__ __forceinline__ void calcCheck(const T1*, const T2*, int, Dist&, int)
            {
            }
            template <typename Dist, typename T1, typename T2>
            static __device__ __forceinline__ void calcWithoutCheck(const T1*, const T2*, Dist&)
            {
            }
        };
        template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN> struct VecDiffCachedCalculator;
        template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, false>
        {
            template <typename Dist, typename T1, typename T2>
            static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
            {
                UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcCheck(vecCached, vecGlob, len, dist, tid);
            }
        };
        template <int THREAD_DIM, int MAX_LEN> struct VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, true>
        {
            template <typename Dist, typename T1, typename T2>
            static __device__ __forceinline__ void calc(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, int tid)
            {
                UnrollVecDiffCached<THREAD_DIM, MAX_LEN / THREAD_DIM>::calcWithoutCheck(vecCached, vecGlob + tid, dist);
            }
        };
    } // namespace vec_distance_detail
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_VEC_DISTANCE_DETAIL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/dynamic_smem.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/dynamic_smem.hpp
@ -0,0 +1,88 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_DYNAMIC_SMEM_HPP
 #define OPENCV_CUDA_DYNAMIC_SMEM_HPP
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template<class T> struct DynamicSharedMem
    {
        __device__ __forceinline__ operator T*()
        {
            extern __shared__ int __smem[];
            return (T*)__smem;
        }
        __device__ __forceinline__ operator const T*() const
        {
            extern __shared__ int __smem[];
            return (T*)__smem;
        }
    };
    // specialize for double to avoid unaligned memory access compile errors
    template<> struct DynamicSharedMem<double>
    {
        __device__ __forceinline__ operator double*()
        {
            extern __shared__ double __smem_d[];
            return (double*)__smem_d;
        }
        __device__ __forceinline__ operator const double*() const
        {
            extern __shared__ double __smem_d[];
            return (double*)__smem_d;
        }
    };
 }}}
 //! @endcond
 #endif // OPENCV_CUDA_DYNAMIC_SMEM_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/emulation.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/emulation.hpp
@ -0,0 +1,269 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_EMULATION_HPP_
 #define OPENCV_CUDA_EMULATION_HPP_
 #include "common.hpp"
 #include "warp_reduce.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    struct Emulation
    {
        static __device__ __forceinline__ int syncthreadsOr(int pred)
        {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200)
                // just campilation stab
                return 0;
 #else
                return __syncthreads_or(pred);
 #endif
        }
        template<int CTA_SIZE>
        static __forceinline__ __device__ int Ballot(int predicate)
        {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200)
            return __ballot(predicate);
 #else
            __shared__ volatile int cta_buffer[CTA_SIZE];
            int tid = threadIdx.x;
            cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0;
            return warp_reduce(cta_buffer);
 #endif
        }
        struct smem
        {
            enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U };
            template<typename T>
            static __device__ __forceinline__ T atomicInc(T* address, T val)
            {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
                T count;
                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
                do
                {
                    count = *address & TAG_MASK;
                    count = tag | (count + 1);
                    *address = count;
                } while (*address != count);
                return (count & TAG_MASK) - 1;
 #else
                return ::atomicInc(address, val);
 #endif
            }
            template<typename T>
            static __device__ __forceinline__ T atomicAdd(T* address, T val)
            {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
                T count;
                unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U);
                do
                {
                    count = *address & TAG_MASK;
                    count = tag | (count + val);
                    *address = count;
                } while (*address != count);
                return (count & TAG_MASK) - val;
 #else
                return ::atomicAdd(address, val);
 #endif
            }
            template<typename T>
            static __device__ __forceinline__ T atomicMin(T* address, T val)
            {
 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
                T count = ::min(*address, val);
                do
                {
                    *address = count;
                } while (*address > count);
                return count;
 #else
                return ::atomicMin(address, val);
 #endif
            }
        }; // struct cmem
        struct glob
        {
            static __device__ __forceinline__ int atomicAdd(int* address, int val)
            {
                return ::atomicAdd(address, val);
            }
            static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
            {
                return ::atomicAdd(address, val);
            }
            static __device__ __forceinline__ float atomicAdd(float* address, float val)
            {
            #if __CUDA_ARCH__ >= 200
                return ::atomicAdd(address, val);
            #else
                int* address_as_i = (int*) address;
                int old = *address_as_i, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_i, assumed,
                        __float_as_int(val + __int_as_float(assumed)));
                } while (assumed != old);
                return __int_as_float(old);
            #endif
            }
            static __device__ __forceinline__ double atomicAdd(double* address, double val)
            {
            #if __CUDA_ARCH__ >= 130
                unsigned long long int* address_as_ull = (unsigned long long int*) address;
                unsigned long long int old = *address_as_ull, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_ull, assumed,
                        __double_as_longlong(val + __longlong_as_double(assumed)));
                } while (assumed != old);
                return __longlong_as_double(old);
            #else
                (void) address;
                (void) val;
                return 0.0;
            #endif
            }
            static __device__ __forceinline__ int atomicMin(int* address, int val)
            {
                return ::atomicMin(address, val);
            }
            static __device__ __forceinline__ float atomicMin(float* address, float val)
            {
            #if __CUDA_ARCH__ >= 120
                int* address_as_i = (int*) address;
                int old = *address_as_i, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_i, assumed,
                        __float_as_int(::fminf(val, __int_as_float(assumed))));
                } while (assumed != old);
                return __int_as_float(old);
            #else
                (void) address;
                (void) val;
                return 0.0f;
            #endif
            }
            static __device__ __forceinline__ double atomicMin(double* address, double val)
            {
            #if __CUDA_ARCH__ >= 130
                unsigned long long int* address_as_ull = (unsigned long long int*) address;
                unsigned long long int old = *address_as_ull, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_ull, assumed,
                        __double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
                } while (assumed != old);
                return __longlong_as_double(old);
            #else
                (void) address;
                (void) val;
                return 0.0;
            #endif
            }
            static __device__ __forceinline__ int atomicMax(int* address, int val)
            {
                return ::atomicMax(address, val);
            }
            static __device__ __forceinline__ float atomicMax(float* address, float val)
            {
            #if __CUDA_ARCH__ >= 120
                int* address_as_i = (int*) address;
                int old = *address_as_i, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_i, assumed,
                        __float_as_int(::fmaxf(val, __int_as_float(assumed))));
                } while (assumed != old);
                return __int_as_float(old);
            #else
                (void) address;
                (void) val;
                return 0.0f;
            #endif
            }
            static __device__ __forceinline__ double atomicMax(double* address, double val)
            {
            #if __CUDA_ARCH__ >= 130
                unsigned long long int* address_as_ull = (unsigned long long int*) address;
                unsigned long long int old = *address_as_ull, assumed;
                do {
                    assumed = old;
                    old = ::atomicCAS(address_as_ull, assumed,
                        __double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
                } while (assumed != old);
                return __longlong_as_double(old);
            #else
                (void) address;
                (void) val;
                return 0.0;
            #endif
            }
        };
    }; //struct Emulation
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif /* OPENCV_CUDA_EMULATION_HPP_ */
--- a/3rdparty/libopencv/include/opencv2/core/cuda/filters.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/filters.hpp
@ -0,0 +1,286 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_FILTERS_HPP
 #define OPENCV_CUDA_FILTERS_HPP
 #include "saturate_cast.hpp"
 #include "vec_traits.hpp"
 #include "vec_math.hpp"
 #include "type_traits.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template <typename Ptr2D> struct PointFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;
        explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
        : src(src_)
        {
            (void)fx;
            (void)fy;
        }
        __device__ __forceinline__ elem_type operator ()(float y, float x) const
        {
            return src(__float2int_rz(y), __float2int_rz(x));
        }
        Ptr2D src;
    };
    template <typename Ptr2D> struct LinearFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;
        explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
        : src(src_)
        {
            (void)fx;
            (void)fy;
        }
        __device__ __forceinline__ elem_type operator ()(float y, float x) const
        {
            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
            work_type out = VecTraits<work_type>::all(0);
            const int x1 = __float2int_rd(x);
            const int y1 = __float2int_rd(y);
            const int x2 = x1 + 1;
            const int y2 = y1 + 1;
            elem_type src_reg = src(y1, x1);
            out = out + src_reg * ((x2 - x) * (y2 - y));
            src_reg = src(y1, x2);
            out = out + src_reg * ((x - x1) * (y2 - y));
            src_reg = src(y2, x1);
            out = out + src_reg * ((x2 - x) * (y - y1));
            src_reg = src(y2, x2);
            out = out + src_reg * ((x - x1) * (y - y1));
            return saturate_cast<elem_type>(out);
        }
        Ptr2D src;
    };
    template <typename Ptr2D> struct CubicFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;
        typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
        explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f)
        : src(src_)
        {
            (void)fx;
            (void)fy;
        }
        static __device__ __forceinline__ float bicubicCoeff(float x_)
        {
            float x = fabsf(x_);
            if (x <= 1.0f)
            {
                return x * x * (1.5f * x - 2.5f) + 1.0f;
            }
            else if (x < 2.0f)
            {
                return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
            }
            else
            {
                return 0.0f;
            }
        }
        __device__ elem_type operator ()(float y, float x) const
        {
            const float xmin = ::ceilf(x - 2.0f);
            const float xmax = ::floorf(x + 2.0f);
            const float ymin = ::ceilf(y - 2.0f);
            const float ymax = ::floorf(y + 2.0f);
            work_type sum = VecTraits<work_type>::all(0);
            float wsum = 0.0f;
            for (float cy = ymin; cy <= ymax; cy += 1.0f)
            {
                for (float cx = xmin; cx <= xmax; cx += 1.0f)
                {
                    const float w = bicubicCoeff(x - cx) * bicubicCoeff(y - cy);
                    sum = sum + w * src(__float2int_rd(cy), __float2int_rd(cx));
                    wsum += w;
                }
            }
            work_type res = (!wsum)? VecTraits<work_type>::all(0) : sum / wsum;
            return saturate_cast<elem_type>(res);
        }
        Ptr2D src;
    };
    // for integer scaling
    template <typename Ptr2D> struct IntegerAreaFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;
        explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
            : src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
        __device__ __forceinline__ elem_type operator ()(float y, float x) const
        {
            float fsx1 = x * scale_x;
            float fsx2 = fsx1 + scale_x;
            int sx1 = __float2int_ru(fsx1);
            int sx2 = __float2int_rd(fsx2);
            float fsy1 = y * scale_y;
            float fsy2 = fsy1 + scale_y;
            int sy1 = __float2int_ru(fsy1);
            int sy2 = __float2int_rd(fsy2);
            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
            work_type out = VecTraits<work_type>::all(0.f);
            for(int dy = sy1; dy < sy2; ++dy)
                for(int dx = sx1; dx < sx2; ++dx)
                {
                    out = out + src(dy, dx) * scale;
                }
            return saturate_cast<elem_type>(out);
        }
        Ptr2D src;
        float scale_x, scale_y ,scale;
    };
    template <typename Ptr2D> struct AreaFilter
    {
        typedef typename Ptr2D::elem_type elem_type;
        typedef float index_type;
        explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
            : src(src_), scale_x(scale_x_), scale_y(scale_y_){}
        __device__ __forceinline__ elem_type operator ()(float y, float x) const
        {
            float fsx1 = x * scale_x;
            float fsx2 = fsx1 + scale_x;
            int sx1 = __float2int_ru(fsx1);
            int sx2 = __float2int_rd(fsx2);
            float fsy1 = y * scale_y;
            float fsy2 = fsy1 + scale_y;
            int sy1 = __float2int_ru(fsy1);
            int sy2 = __float2int_rd(fsy2);
            float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
            typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
            work_type out = VecTraits<work_type>::all(0.f);
            for (int dy = sy1; dy < sy2; ++dy)
            {
                for (int dx = sx1; dx < sx2; ++dx)
                    out = out + src(dy, dx) * scale;
                if (sx1 > fsx1)
                    out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
                if (sx2 < fsx2)
                    out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
            }
            if (sy1 > fsy1)
                for (int dx = sx1; dx < sx2; ++dx)
                    out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
            if (sy2 < fsy2)
                for (int dx = sx1; dx < sx2; ++dx)
                    out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
            if ((sy1 > fsy1) &&  (sx1 > fsx1))
                out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
            if ((sy1 > fsy1) &&  (sx2 < fsx2))
                out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
            if ((sy2 < fsy2) &&  (sx2 < fsx2))
                out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
            if ((sy2 < fsy2) &&  (sx1 > fsx1))
                out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
            return saturate_cast<elem_type>(out);
        }
        Ptr2D src;
        float scale_x, scale_y;
        int width, haight;
    };
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_FILTERS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/funcattrib.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/funcattrib.hpp
@ -0,0 +1,79 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
 #define OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP
 #include <cstdio>
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template<class Func>
    void printFuncAttrib(Func& func)
    {
        cudaFuncAttributes attrs;
        cudaFuncGetAttributes(&attrs, func);
        printf("=== Function stats ===\n");
        printf("Name: \n");
        printf("sharedSizeBytes    = %d\n", attrs.sharedSizeBytes);
        printf("constSizeBytes     = %d\n", attrs.constSizeBytes);
        printf("localSizeBytes     = %d\n", attrs.localSizeBytes);
        printf("maxThreadsPerBlock = %d\n", attrs.maxThreadsPerBlock);
        printf("numRegs            = %d\n", attrs.numRegs);
        printf("ptxVersion         = %d\n", attrs.ptxVersion);
        printf("binaryVersion      = %d\n", attrs.binaryVersion);
        printf("\n");
        fflush(stdout);
    }
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif  /* OPENCV_CUDA_DEVICE_FUNCATTRIB_HPP */
--- a/3rdparty/libopencv/include/opencv2/core/cuda/functional.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/functional.hpp
@ -0,0 +1,811 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_FUNCTIONAL_HPP
 #define OPENCV_CUDA_FUNCTIONAL_HPP
 #include <functional>
 #include "saturate_cast.hpp"
 #include "vec_traits.hpp"
 #include "type_traits.hpp"
 #include "device_functions.h"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    // Function Objects
 #ifdef CV_CXX11
    template<typename Argument, typename Result> struct unary_function
    {
        typedef Argument argument_type;
        typedef Result result_type;
    };
    template<typename Argument1, typename Argument2, typename Result> struct binary_function
    {
        typedef Argument1 first_argument_type;
        typedef Argument2 second_argument_type;
        typedef Result result_type;
    };
 #else
    template<typename Argument, typename Result> struct unary_function : public std::unary_function<Argument, Result> {};
    template<typename Argument1, typename Argument2, typename Result> struct binary_function : public std::binary_function<Argument1, Argument2, Result> {};
 #endif
    // Arithmetic Operations
    template <typename T> struct plus : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a + b;
        }
        __host__ __device__ __forceinline__ plus() {}
        __host__ __device__ __forceinline__ plus(const plus&) {}
    };
    template <typename T> struct minus : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a - b;
        }
        __host__ __device__ __forceinline__ minus() {}
        __host__ __device__ __forceinline__ minus(const minus&) {}
    };
    template <typename T> struct multiplies : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a * b;
        }
        __host__ __device__ __forceinline__ multiplies() {}
        __host__ __device__ __forceinline__ multiplies(const multiplies&) {}
    };
    template <typename T> struct divides : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a / b;
        }
        __host__ __device__ __forceinline__ divides() {}
        __host__ __device__ __forceinline__ divides(const divides&) {}
    };
    template <typename T> struct modulus : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a % b;
        }
        __host__ __device__ __forceinline__ modulus() {}
        __host__ __device__ __forceinline__ modulus(const modulus&) {}
    };
    template <typename T> struct negate : unary_function<T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a) const
        {
            return -a;
        }
        __host__ __device__ __forceinline__ negate() {}
        __host__ __device__ __forceinline__ negate(const negate&) {}
    };
    // Comparison Operations
    template <typename T> struct equal_to : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a == b;
        }
        __host__ __device__ __forceinline__ equal_to() {}
        __host__ __device__ __forceinline__ equal_to(const equal_to&) {}
    };
    template <typename T> struct not_equal_to : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a != b;
        }
        __host__ __device__ __forceinline__ not_equal_to() {}
        __host__ __device__ __forceinline__ not_equal_to(const not_equal_to&) {}
    };
    template <typename T> struct greater : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a > b;
        }
        __host__ __device__ __forceinline__ greater() {}
        __host__ __device__ __forceinline__ greater(const greater&) {}
    };
    template <typename T> struct less : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a < b;
        }
        __host__ __device__ __forceinline__ less() {}
        __host__ __device__ __forceinline__ less(const less&) {}
    };
    template <typename T> struct greater_equal : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a >= b;
        }
        __host__ __device__ __forceinline__ greater_equal() {}
        __host__ __device__ __forceinline__ greater_equal(const greater_equal&) {}
    };
    template <typename T> struct less_equal : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a <= b;
        }
        __host__ __device__ __forceinline__ less_equal() {}
        __host__ __device__ __forceinline__ less_equal(const less_equal&) {}
    };
    // Logical Operations
    template <typename T> struct logical_and : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a && b;
        }
        __host__ __device__ __forceinline__ logical_and() {}
        __host__ __device__ __forceinline__ logical_and(const logical_and&) {}
    };
    template <typename T> struct logical_or : binary_function<T, T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a,
                                                    typename TypeTraits<T>::ParameterType b) const
        {
            return a || b;
        }
        __host__ __device__ __forceinline__ logical_or() {}
        __host__ __device__ __forceinline__ logical_or(const logical_or&) {}
    };
    template <typename T> struct logical_not : unary_function<T, bool>
    {
        __device__ __forceinline__ bool operator ()(typename TypeTraits<T>::ParameterType a) const
        {
            return !a;
        }
        __host__ __device__ __forceinline__ logical_not() {}
        __host__ __device__ __forceinline__ logical_not(const logical_not&) {}
    };
    // Bitwise Operations
    template <typename T> struct bit_and : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a & b;
        }
        __host__ __device__ __forceinline__ bit_and() {}
        __host__ __device__ __forceinline__ bit_and(const bit_and&) {}
    };
    template <typename T> struct bit_or : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a | b;
        }
        __host__ __device__ __forceinline__ bit_or() {}
        __host__ __device__ __forceinline__ bit_or(const bit_or&) {}
    };
    template <typename T> struct bit_xor : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType a,
                                                 typename TypeTraits<T>::ParameterType b) const
        {
            return a ^ b;
        }
        __host__ __device__ __forceinline__ bit_xor() {}
        __host__ __device__ __forceinline__ bit_xor(const bit_xor&) {}
    };
    template <typename T> struct bit_not : unary_function<T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType v) const
        {
            return ~v;
        }
        __host__ __device__ __forceinline__ bit_not() {}
        __host__ __device__ __forceinline__ bit_not(const bit_not&) {}
    };
    // Generalized Identity Operations
    template <typename T> struct identity : unary_function<T, T>
    {
        __device__ __forceinline__ typename TypeTraits<T>::ParameterType operator()(typename TypeTraits<T>::ParameterType x) const
        {
            return x;
        }
        __host__ __device__ __forceinline__ identity() {}
        __host__ __device__ __forceinline__ identity(const identity&) {}
    };
    template <typename T1, typename T2> struct project1st : binary_function<T1, T2, T1>
    {
        __device__ __forceinline__ typename TypeTraits<T1>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
        {
            return lhs;
        }
        __host__ __device__ __forceinline__ project1st() {}
        __host__ __device__ __forceinline__ project1st(const project1st&) {}
    };
    template <typename T1, typename T2> struct project2nd : binary_function<T1, T2, T2>
    {
        __device__ __forceinline__ typename TypeTraits<T2>::ParameterType operator()(typename TypeTraits<T1>::ParameterType lhs, typename TypeTraits<T2>::ParameterType rhs) const
        {
            return rhs;
        }
        __host__ __device__ __forceinline__ project2nd() {}
        __host__ __device__ __forceinline__ project2nd(const project2nd&) {}
    };
    // Min/Max Operations
 #define OPENCV_CUDA_IMPLEMENT_MINMAX(name, type, op) \
    template <> struct name<type> : binary_function<type, type, type> \
    { \
        __device__ __forceinline__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
        __host__ __device__ __forceinline__ name() {}\
        __host__ __device__ __forceinline__ name(const name&) {}\
    };
    template <typename T> struct maximum : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
        {
            return max(lhs, rhs);
        }
        __host__ __device__ __forceinline__ maximum() {}
        __host__ __device__ __forceinline__ maximum(const maximum&) {}
    };
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uchar, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, schar, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, char, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, ushort, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, short, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, int, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, uint, ::max)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, float, ::fmax)
    OPENCV_CUDA_IMPLEMENT_MINMAX(maximum, double, ::fmax)
    template <typename T> struct minimum : binary_function<T, T, T>
    {
        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType lhs, typename TypeTraits<T>::ParameterType rhs) const
        {
            return min(lhs, rhs);
        }
        __host__ __device__ __forceinline__ minimum() {}
        __host__ __device__ __forceinline__ minimum(const minimum&) {}
    };
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uchar, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, schar, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, char, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, ushort, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, short, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, int, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, uint, ::min)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, float, ::fmin)
    OPENCV_CUDA_IMPLEMENT_MINMAX(minimum, double, ::fmin)
 #undef OPENCV_CUDA_IMPLEMENT_MINMAX
    // Math functions
    template <typename T> struct abs_func : unary_function<T, T>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType x) const
        {
            return abs(x);
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<unsigned char> : unary_function<unsigned char, unsigned char>
    {
        __device__ __forceinline__ unsigned char operator ()(unsigned char x) const
        {
            return x;
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<signed char> : unary_function<signed char, signed char>
    {
        __device__ __forceinline__ signed char operator ()(signed char x) const
        {
            return ::abs((int)x);
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<char> : unary_function<char, char>
    {
        __device__ __forceinline__ char operator ()(char x) const
        {
            return ::abs((int)x);
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<unsigned short> : unary_function<unsigned short, unsigned short>
    {
        __device__ __forceinline__ unsigned short operator ()(unsigned short x) const
        {
            return x;
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<short> : unary_function<short, short>
    {
        __device__ __forceinline__ short operator ()(short x) const
        {
            return ::abs((int)x);
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<unsigned int> : unary_function<unsigned int, unsigned int>
    {
        __device__ __forceinline__ unsigned int operator ()(unsigned int x) const
        {
            return x;
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<int> : unary_function<int, int>
    {
        __device__ __forceinline__ int operator ()(int x) const
        {
            return ::abs(x);
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<float> : unary_function<float, float>
    {
        __device__ __forceinline__ float operator ()(float x) const
        {
            return ::fabsf(x);
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
    template <> struct abs_func<double> : unary_function<double, double>
    {
        __device__ __forceinline__ double operator ()(double x) const
        {
            return ::fabs(x);
        }
        __host__ __device__ __forceinline__ abs_func() {}
        __host__ __device__ __forceinline__ abs_func(const abs_func&) {}
    };
 #define OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(name, func) \
    template <typename T> struct name ## _func : unary_function<T, float> \
    { \
        __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
        { \
            return func ## f(v); \
        } \
        __host__ __device__ __forceinline__ name ## _func() {} \
        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    }; \
    template <> struct name ## _func<double> : unary_function<double, double> \
    { \
        __device__ __forceinline__ double operator ()(double v) const \
        { \
            return func(v); \
        } \
        __host__ __device__ __forceinline__ name ## _func() {} \
        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    };
 #define OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(name, func) \
    template <typename T> struct name ## _func : binary_function<T, T, float> \
    { \
        __device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
        { \
            return func ## f(v1, v2); \
        } \
        __host__ __device__ __forceinline__ name ## _func() {} \
        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    }; \
    template <> struct name ## _func<double> : binary_function<double, double, double> \
    { \
        __device__ __forceinline__ double operator ()(double v1, double v2) const \
        { \
            return func(v1, v2); \
        } \
        __host__ __device__ __forceinline__ name ## _func() {} \
        __host__ __device__ __forceinline__ name ## _func(const name ## _func&) {} \
    };
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sqrt, ::sqrt)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp, ::exp)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp2, ::exp2)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(exp10, ::exp10)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log, ::log)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log2, ::log2)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(log10, ::log10)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sin, ::sin)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cos, ::cos)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tan, ::tan)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asin, ::asin)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acos, ::acos)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atan, ::atan)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(sinh, ::sinh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(cosh, ::cosh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(tanh, ::tanh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(asinh, ::asinh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(acosh, ::acosh)
    OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR(atanh, ::atanh)
    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(hypot, ::hypot)
    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(atan2, ::atan2)
    OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR(pow, ::pow)
    #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR
    #undef OPENCV_CUDA_IMPLEMENT_UN_FUNCTOR_NO_DOUBLE
    #undef OPENCV_CUDA_IMPLEMENT_BIN_FUNCTOR
    template<typename T> struct hypot_sqr_func : binary_function<T, T, float>
    {
        __device__ __forceinline__ T operator ()(typename TypeTraits<T>::ParameterType src1, typename TypeTraits<T>::ParameterType src2) const
        {
            return src1 * src1 + src2 * src2;
        }
        __host__ __device__ __forceinline__ hypot_sqr_func() {}
        __host__ __device__ __forceinline__ hypot_sqr_func(const hypot_sqr_func&) {}
    };
    // Saturate Cast Functor
    template <typename T, typename D> struct saturate_cast_func : unary_function<T, D>
    {
        __device__ __forceinline__ D operator ()(typename TypeTraits<T>::ParameterType v) const
        {
            return saturate_cast<D>(v);
        }
        __host__ __device__ __forceinline__ saturate_cast_func() {}
        __host__ __device__ __forceinline__ saturate_cast_func(const saturate_cast_func&) {}
    };
    // Threshold Functors
    template <typename T> struct thresh_binary_func : unary_function<T, T>
    {
        __host__ __device__ __forceinline__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return (src > thresh) * maxVal;
        }
        __host__ __device__ __forceinline__ thresh_binary_func() {}
        __host__ __device__ __forceinline__ thresh_binary_func(const thresh_binary_func& other)
            : thresh(other.thresh), maxVal(other.maxVal) {}
        T thresh;
        T maxVal;
    };
    template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
    {
        __host__ __device__ __forceinline__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return (src <= thresh) * maxVal;
        }
        __host__ __device__ __forceinline__ thresh_binary_inv_func() {}
        __host__ __device__ __forceinline__ thresh_binary_inv_func(const thresh_binary_inv_func& other)
            : thresh(other.thresh), maxVal(other.maxVal) {}
        T thresh;
        T maxVal;
    };
    template <typename T> struct thresh_trunc_func : unary_function<T, T>
    {
        explicit __host__ __device__ __forceinline__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return minimum<T>()(src, thresh);
        }
        __host__ __device__ __forceinline__ thresh_trunc_func() {}
        __host__ __device__ __forceinline__ thresh_trunc_func(const thresh_trunc_func& other)
            : thresh(other.thresh) {}
        T thresh;
    };
    template <typename T> struct thresh_to_zero_func : unary_function<T, T>
    {
        explicit __host__ __device__ __forceinline__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return (src > thresh) * src;
        }
        __host__ __device__ __forceinline__ thresh_to_zero_func() {}
       __host__  __device__ __forceinline__ thresh_to_zero_func(const thresh_to_zero_func& other)
            : thresh(other.thresh) {}
        T thresh;
    };
    template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
    {
        explicit __host__ __device__ __forceinline__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {(void)maxVal_;}
        __device__ __forceinline__ T operator()(typename TypeTraits<T>::ParameterType src) const
        {
            return (src <= thresh) * src;
        }
        __host__ __device__ __forceinline__ thresh_to_zero_inv_func() {}
        __host__ __device__ __forceinline__ thresh_to_zero_inv_func(const thresh_to_zero_inv_func& other)
            : thresh(other.thresh) {}
        T thresh;
    };
    // Function Object Adaptors
    template <typename Predicate> struct unary_negate : unary_function<typename Predicate::argument_type, bool>
    {
      explicit __host__ __device__ __forceinline__ unary_negate(const Predicate& p) : pred(p) {}
      __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::argument_type>::ParameterType x) const
      {
          return !pred(x);
      }
      __host__ __device__ __forceinline__ unary_negate() {}
      __host__ __device__ __forceinline__ unary_negate(const unary_negate& other) : pred(other.pred) {}
      Predicate pred;
    };
    template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
    {
        return unary_negate<Predicate>(pred);
    }
    template <typename Predicate> struct binary_negate : binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>
    {
        explicit __host__ __device__ __forceinline__ binary_negate(const Predicate& p) : pred(p) {}
        __device__ __forceinline__ bool operator()(typename TypeTraits<typename Predicate::first_argument_type>::ParameterType x,
                                                   typename TypeTraits<typename Predicate::second_argument_type>::ParameterType y) const
        {
            return !pred(x,y);
        }
        __host__ __device__ __forceinline__ binary_negate() {}
        __host__ __device__ __forceinline__ binary_negate(const binary_negate& other) : pred(other.pred) {}
        Predicate pred;
    };
    template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
    {
        return binary_negate<BinaryPredicate>(pred);
    }
    template <typename Op> struct binder1st : unary_function<typename Op::second_argument_type, typename Op::result_type>
    {
        __host__ __device__ __forceinline__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
        __device__ __forceinline__ typename Op::result_type operator ()(typename TypeTraits<typename Op::second_argument_type>::ParameterType a) const
        {
            return op(arg1, a);
        }
        __host__ __device__ __forceinline__ binder1st() {}
        __host__ __device__ __forceinline__ binder1st(const binder1st& other) : op(other.op), arg1(other.arg1) {}
        Op op;
        typename Op::first_argument_type arg1;
    };
    template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
    {
        return binder1st<Op>(op, typename Op::first_argument_type(x));
    }
    template <typename Op> struct binder2nd : unary_function<typename Op::first_argument_type, typename Op::result_type>
    {
        __host__ __device__ __forceinline__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
        __forceinline__ __device__ typename Op::result_type operator ()(typename TypeTraits<typename Op::first_argument_type>::ParameterType a) const
        {
            return op(a, arg2);
        }
        __host__ __device__ __forceinline__ binder2nd() {}
        __host__ __device__ __forceinline__ binder2nd(const binder2nd& other) : op(other.op), arg2(other.arg2) {}
        Op op;
        typename Op::second_argument_type arg2;
    };
    template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
    {
        return binder2nd<Op>(op, typename Op::second_argument_type(x));
    }
    // Functor Traits
    template <typename F> struct IsUnaryFunction
    {
        typedef char Yes;
        struct No {Yes a[2];};
        template <typename T, typename D> static Yes check(unary_function<T, D>);
        static No check(...);
        static F makeF();
        enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
    };
    template <typename F> struct IsBinaryFunction
    {
        typedef char Yes;
        struct No {Yes a[2];};
        template <typename T1, typename T2, typename D> static Yes check(binary_function<T1, T2, D>);
        static No check(...);
        static F makeF();
        enum { value = (sizeof(check(makeF())) == sizeof(Yes)) };
    };
    namespace functional_detail
    {
        template <size_t src_elem_size, size_t dst_elem_size> struct UnOpShift { enum { shift = 1 }; };
        template <size_t src_elem_size> struct UnOpShift<src_elem_size, 1> { enum { shift = 4 }; };
        template <size_t src_elem_size> struct UnOpShift<src_elem_size, 2> { enum { shift = 2 }; };
        template <typename T, typename D> struct DefaultUnaryShift
        {
            enum { shift = UnOpShift<sizeof(T), sizeof(D)>::shift };
        };
        template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size> struct BinOpShift { enum { shift = 1 }; };
        template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 1> { enum { shift = 4 }; };
        template <size_t src_elem_size1, size_t src_elem_size2> struct BinOpShift<src_elem_size1, src_elem_size2, 2> { enum { shift = 2 }; };
        template <typename T1, typename T2, typename D> struct DefaultBinaryShift
        {
            enum { shift = BinOpShift<sizeof(T1), sizeof(T2), sizeof(D)>::shift };
        };
        template <typename Func, bool unary = IsUnaryFunction<Func>::value> struct ShiftDispatcher;
        template <typename Func> struct ShiftDispatcher<Func, true>
        {
            enum { shift = DefaultUnaryShift<typename Func::argument_type, typename Func::result_type>::shift };
        };
        template <typename Func> struct ShiftDispatcher<Func, false>
        {
            enum { shift = DefaultBinaryShift<typename Func::first_argument_type, typename Func::second_argument_type, typename Func::result_type>::shift };
        };
    }
    template <typename Func> struct DefaultTransformShift
    {
        enum { shift = functional_detail::ShiftDispatcher<Func>::shift };
    };
    template <typename Func> struct DefaultTransformFunctorTraits
    {
        enum { simple_block_dim_x = 16 };
        enum { simple_block_dim_y = 16 };
        enum { smart_block_dim_x = 16 };
        enum { smart_block_dim_y = 16 };
        enum { smart_shift = DefaultTransformShift<Func>::shift };
    };
    template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
 #define OPENCV_CUDA_TRANSFORM_FUNCTOR_TRAITS(type) \
    template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_FUNCTIONAL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/limits.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/limits.hpp
@ -0,0 +1,128 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_LIMITS_HPP
 #define OPENCV_CUDA_LIMITS_HPP
 #include <limits.h>
 #include <float.h>
 #include "common.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
 template <class T> struct numeric_limits;
 template <> struct numeric_limits<bool>
 {
    __device__ __forceinline__ static bool min() { return false; }
    __device__ __forceinline__ static bool max() { return true;  }
    static const bool is_signed = false;
 };
 template <> struct numeric_limits<signed char>
 {
    __device__ __forceinline__ static signed char min() { return SCHAR_MIN; }
    __device__ __forceinline__ static signed char max() { return SCHAR_MAX; }
    static const bool is_signed = true;
 };
 template <> struct numeric_limits<unsigned char>
 {
    __device__ __forceinline__ static unsigned char min() { return 0; }
    __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; }
    static const bool is_signed = false;
 };
 template <> struct numeric_limits<short>
 {
    __device__ __forceinline__ static short min() { return SHRT_MIN; }
    __device__ __forceinline__ static short max() { return SHRT_MAX; }
    static const bool is_signed = true;
 };
 template <> struct numeric_limits<unsigned short>
 {
    __device__ __forceinline__ static unsigned short min() { return 0; }
    __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; }
    static const bool is_signed = false;
 };
 template <> struct numeric_limits<int>
 {
    __device__ __forceinline__ static int min() { return INT_MIN; }
    __device__ __forceinline__ static int max() { return INT_MAX; }
    static const bool is_signed = true;
 };
 template <> struct numeric_limits<unsigned int>
 {
    __device__ __forceinline__ static unsigned int min() { return 0; }
    __device__ __forceinline__ static unsigned int max() { return UINT_MAX; }
    static const bool is_signed = false;
 };
 template <> struct numeric_limits<float>
 {
    __device__ __forceinline__ static float min() { return FLT_MIN; }
    __device__ __forceinline__ static float max() { return FLT_MAX; }
    __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; }
    static const bool is_signed = true;
 };
 template <> struct numeric_limits<double>
 {
    __device__ __forceinline__ static double min() { return DBL_MIN; }
    __device__ __forceinline__ static double max() { return DBL_MAX; }
    __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; }
    static const bool is_signed = true;
 };
 }}} // namespace cv { namespace cuda { namespace cudev {
 //! @endcond
 #endif // OPENCV_CUDA_LIMITS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/reduce.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/reduce.hpp
@ -0,0 +1,209 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_REDUCE_HPP
 #define OPENCV_CUDA_REDUCE_HPP
 #ifndef THRUST_DEBUG // eliminate -Wundef warning
 #define THRUST_DEBUG 0
 #endif
 #include <thrust/tuple.h>
 #include "detail/reduce.hpp"
 #include "detail/reduce_key_val.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template <int N, typename T, class Op>
    __device__ __forceinline__ void reduce(volatile T* smem, T& val, unsigned int tid, const Op& op)
    {
        reduce_detail::Dispatcher<N>::reductor::template reduce<volatile T*, T&, const Op&>(smem, val, tid, op);
    }
    template <int N,
              typename P0, typename P1, typename P2, typename P3, typename P4, typename P5, typename P6, typename P7, typename P8, typename P9,
              typename R0, typename R1, typename R2, typename R3, typename R4, typename R5, typename R6, typename R7, typename R8, typename R9,
              class Op0, class Op1, class Op2, class Op3, class Op4, class Op5, class Op6, class Op7, class Op8, class Op9>
    __device__ __forceinline__ void reduce(const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>& smem,
                                           const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>& val,
                                           unsigned int tid,
                                           const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>& op)
    {
        reduce_detail::Dispatcher<N>::reductor::template reduce<
                const thrust::tuple<P0, P1, P2, P3, P4, P5, P6, P7, P8, P9>&,
                const thrust::tuple<R0, R1, R2, R3, R4, R5, R6, R7, R8, R9>&,
                const thrust::tuple<Op0, Op1, Op2, Op3, Op4, Op5, Op6, Op7, Op8, Op9>&>(smem, val, tid, op);
    }
    template <unsigned int N, typename K, typename V, class Cmp>
    __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key, volatile V* svals, V& val, unsigned int tid, const Cmp& cmp)
    {
        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&, volatile V*, V&, const Cmp&>(skeys, key, svals, val, tid, cmp);
    }
    template <unsigned int N,
              typename K,
              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
              class Cmp>
    __device__ __forceinline__ void reduceKeyVal(volatile K* skeys, K& key,
                                                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                 unsigned int tid, const Cmp& cmp)
    {
        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<volatile K*, K&,
                const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
                const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
                const Cmp&>(skeys, key, svals, val, tid, cmp);
    }
    template <unsigned int N,
              typename KP0, typename KP1, typename KP2, typename KP3, typename KP4, typename KP5, typename KP6, typename KP7, typename KP8, typename KP9,
              typename KR0, typename KR1, typename KR2, typename KR3, typename KR4, typename KR5, typename KR6, typename KR7, typename KR8, typename KR9,
              typename VP0, typename VP1, typename VP2, typename VP3, typename VP4, typename VP5, typename VP6, typename VP7, typename VP8, typename VP9,
              typename VR0, typename VR1, typename VR2, typename VR3, typename VR4, typename VR5, typename VR6, typename VR7, typename VR8, typename VR9,
              class Cmp0, class Cmp1, class Cmp2, class Cmp3, class Cmp4, class Cmp5, class Cmp6, class Cmp7, class Cmp8, class Cmp9>
    __device__ __forceinline__ void reduceKeyVal(const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>& skeys,
                                                 const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>& key,
                                                 const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>& svals,
                                                 const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>& val,
                                                 unsigned int tid,
                                                 const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>& cmp)
    {
        reduce_key_val_detail::Dispatcher<N>::reductor::template reduce<
                const thrust::tuple<KP0, KP1, KP2, KP3, KP4, KP5, KP6, KP7, KP8, KP9>&,
                const thrust::tuple<KR0, KR1, KR2, KR3, KR4, KR5, KR6, KR7, KR8, KR9>&,
                const thrust::tuple<VP0, VP1, VP2, VP3, VP4, VP5, VP6, VP7, VP8, VP9>&,
                const thrust::tuple<VR0, VR1, VR2, VR3, VR4, VR5, VR6, VR7, VR8, VR9>&,
                const thrust::tuple<Cmp0, Cmp1, Cmp2, Cmp3, Cmp4, Cmp5, Cmp6, Cmp7, Cmp8, Cmp9>&
                >(skeys, key, svals, val, tid, cmp);
    }
    // smem_tuple
    template <typename T0>
    __device__ __forceinline__
    thrust::tuple<volatile T0*>
    smem_tuple(T0* t0)
    {
        return thrust::make_tuple((volatile T0*) t0);
    }
    template <typename T0, typename T1>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*>
    smem_tuple(T0* t0, T1* t1)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1);
    }
    template <typename T0, typename T1, typename T2>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*>
    smem_tuple(T0* t0, T1* t1, T2* t2)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2);
    }
    template <typename T0, typename T1, typename T2, typename T3>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3);
    }
    template <typename T0, typename T1, typename T2, typename T3, typename T4>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4);
    }
    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5);
    }
    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6);
    }
    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7);
    }
    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8);
    }
    template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9>
    __device__ __forceinline__
    thrust::tuple<volatile T0*, volatile T1*, volatile T2*, volatile T3*, volatile T4*, volatile T5*, volatile T6*, volatile T7*, volatile T8*, volatile T9*>
    smem_tuple(T0* t0, T1* t1, T2* t2, T3* t3, T4* t4, T5* t5, T6* t6, T7* t7, T8* t8, T9* t9)
    {
        return thrust::make_tuple((volatile T0*) t0, (volatile T1*) t1, (volatile T2*) t2, (volatile T3*) t3, (volatile T4*) t4, (volatile T5*) t5, (volatile T6*) t6, (volatile T7*) t7, (volatile T8*) t8, (volatile T9*) t9);
    }
 }}}
 //! @endcond
 #endif // OPENCV_CUDA_REDUCE_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/saturate_cast.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/saturate_cast.hpp
@ -0,0 +1,292 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_SATURATE_CAST_HPP
 #define OPENCV_CUDA_SATURATE_CAST_HPP
 #include "common.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
    template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
    {
        uint res = 0;
        int vi = v;
        asm("cvt.sat.u8.s8 %0, %1;" : "=r"(res) : "r"(vi));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
    {
        uint res = 0;
        asm("cvt.sat.u8.s16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
    {
        uint res = 0;
        asm("cvt.sat.u8.u16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
    {
        uint res = 0;
        asm("cvt.sat.u8.s32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
    {
        uint res = 0;
        asm("cvt.sat.u8.u32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
    {
        uint res = 0;
        asm("cvt.rni.sat.u8.f32 %0, %1;" : "=r"(res) : "f"(v));
        return res;
    }
    template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        uint res = 0;
        asm("cvt.rni.sat.u8.f64 %0, %1;" : "=r"(res) : "d"(v));
        return res;
    #else
        return saturate_cast<uchar>((float)v);
    #endif
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
    {
        uint res = 0;
        uint vi = v;
        asm("cvt.sat.s8.u8 %0, %1;" : "=r"(res) : "r"(vi));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
    {
        uint res = 0;
        asm("cvt.sat.s8.s16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
    {
        uint res = 0;
        asm("cvt.sat.s8.u16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
    {
        uint res = 0;
        asm("cvt.sat.s8.s32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
    {
        uint res = 0;
        asm("cvt.sat.s8.u32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
    {
        uint res = 0;
        asm("cvt.rni.sat.s8.f32 %0, %1;" : "=r"(res) : "f"(v));
        return res;
    }
    template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        uint res = 0;
        asm("cvt.rni.sat.s8.f64 %0, %1;" : "=r"(res) : "d"(v));
        return res;
    #else
        return saturate_cast<schar>((float)v);
    #endif
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
    {
        ushort res = 0;
        int vi = v;
        asm("cvt.sat.u16.s8 %0, %1;" : "=h"(res) : "r"(vi));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
    {
        ushort res = 0;
        asm("cvt.sat.u16.s16 %0, %1;" : "=h"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
    {
        ushort res = 0;
        asm("cvt.sat.u16.s32 %0, %1;" : "=h"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
    {
        ushort res = 0;
        asm("cvt.sat.u16.u32 %0, %1;" : "=h"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
    {
        ushort res = 0;
        asm("cvt.rni.sat.u16.f32 %0, %1;" : "=h"(res) : "f"(v));
        return res;
    }
    template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        ushort res = 0;
        asm("cvt.rni.sat.u16.f64 %0, %1;" : "=h"(res) : "d"(v));
        return res;
    #else
        return saturate_cast<ushort>((float)v);
    #endif
    }
    template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
    {
        short res = 0;
        asm("cvt.sat.s16.u16 %0, %1;" : "=h"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ short saturate_cast<short>(int v)
    {
        short res = 0;
        asm("cvt.sat.s16.s32 %0, %1;" : "=h"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
    {
        short res = 0;
        asm("cvt.sat.s16.u32 %0, %1;" : "=h"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ short saturate_cast<short>(float v)
    {
        short res = 0;
        asm("cvt.rni.sat.s16.f32 %0, %1;" : "=h"(res) : "f"(v));
        return res;
    }
    template<> __device__ __forceinline__ short saturate_cast<short>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        short res = 0;
        asm("cvt.rni.sat.s16.f64 %0, %1;" : "=h"(res) : "d"(v));
        return res;
    #else
        return saturate_cast<short>((float)v);
    #endif
    }
    template<> __device__ __forceinline__ int saturate_cast<int>(uint v)
    {
        int res = 0;
        asm("cvt.sat.s32.u32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ int saturate_cast<int>(float v)
    {
        return __float2int_rn(v);
    }
    template<> __device__ __forceinline__ int saturate_cast<int>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        return __double2int_rn(v);
    #else
        return saturate_cast<int>((float)v);
    #endif
    }
    template<> __device__ __forceinline__ uint saturate_cast<uint>(schar v)
    {
        uint res = 0;
        int vi = v;
        asm("cvt.sat.u32.s8 %0, %1;" : "=r"(res) : "r"(vi));
        return res;
    }
    template<> __device__ __forceinline__ uint saturate_cast<uint>(short v)
    {
        uint res = 0;
        asm("cvt.sat.u32.s16 %0, %1;" : "=r"(res) : "h"(v));
        return res;
    }
    template<> __device__ __forceinline__ uint saturate_cast<uint>(int v)
    {
        uint res = 0;
        asm("cvt.sat.u32.s32 %0, %1;" : "=r"(res) : "r"(v));
        return res;
    }
    template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
    {
        return __float2uint_rn(v);
    }
    template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 130
        return __double2uint_rn(v);
    #else
        return saturate_cast<uint>((float)v);
    #endif
    }
 }}}
 //! @endcond
 #endif /* OPENCV_CUDA_SATURATE_CAST_HPP */
--- a/3rdparty/libopencv/include/opencv2/core/cuda/scan.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/scan.hpp
@ -0,0 +1,258 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_SCAN_HPP
 #define OPENCV_CUDA_SCAN_HPP
 #include "opencv2/core/cuda/common.hpp"
 #include "opencv2/core/cuda/utility.hpp"
 #include "opencv2/core/cuda/warp.hpp"
 #include "opencv2/core/cuda/warp_shuffle.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    enum ScanKind { EXCLUSIVE = 0,  INCLUSIVE = 1 };
    template <ScanKind Kind, typename T, typename F> struct WarpScan
    {
        __device__ __forceinline__ WarpScan() {}
        __device__ __forceinline__ WarpScan(const WarpScan& other) { (void)other; }
        __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
        {
            const unsigned int lane = idx & 31;
            F op;
            if ( lane >=  1) ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
            if ( lane >=  2) ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
            if ( lane >=  4) ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
            if ( lane >=  8) ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
            if ( lane >= 16) ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
            if( Kind == INCLUSIVE )
                return ptr [idx];
            else
                return (lane > 0) ? ptr [idx - 1] : 0;
        }
        __device__ __forceinline__ unsigned int index(const unsigned int tid)
        {
            return tid;
        }
        __device__ __forceinline__ void init(volatile T *ptr){}
        static const int warp_offset      = 0;
        typedef WarpScan<INCLUSIVE, T, F>  merge;
    };
    template <ScanKind Kind , typename T, typename F> struct WarpScanNoComp
    {
        __device__ __forceinline__ WarpScanNoComp() {}
        __device__ __forceinline__ WarpScanNoComp(const WarpScanNoComp& other) { (void)other; }
        __device__ __forceinline__ T operator()( volatile T *ptr , const unsigned int idx)
        {
            const unsigned int lane = threadIdx.x & 31;
            F op;
            ptr [idx ] = op(ptr [idx -  1], ptr [idx]);
            ptr [idx ] = op(ptr [idx -  2], ptr [idx]);
            ptr [idx ] = op(ptr [idx -  4], ptr [idx]);
            ptr [idx ] = op(ptr [idx -  8], ptr [idx]);
            ptr [idx ] = op(ptr [idx - 16], ptr [idx]);
            if( Kind == INCLUSIVE )
                return ptr [idx];
            else
                return (lane > 0) ? ptr [idx - 1] : 0;
        }
        __device__ __forceinline__ unsigned int index(const unsigned int tid)
        {
            return (tid >> warp_log) * warp_smem_stride + 16 + (tid & warp_mask);
        }
        __device__ __forceinline__ void init(volatile T *ptr)
        {
            ptr[threadIdx.x] = 0;
        }
        static const int warp_smem_stride = 32 + 16 + 1;
        static const int warp_offset      = 16;
        static const int warp_log         = 5;
        static const int warp_mask        = 31;
        typedef WarpScanNoComp<INCLUSIVE, T, F> merge;
    };
    template <ScanKind Kind , typename T, typename Sc, typename F> struct BlockScan
    {
        __device__ __forceinline__ BlockScan() {}
        __device__ __forceinline__ BlockScan(const BlockScan& other) { (void)other; }
        __device__ __forceinline__ T operator()(volatile T *ptr)
        {
            const unsigned int tid  = threadIdx.x;
            const unsigned int lane = tid & warp_mask;
            const unsigned int warp = tid >> warp_log;
            Sc scan;
            typename Sc::merge merge_scan;
            const unsigned int idx = scan.index(tid);
            T val = scan(ptr, idx);
            __syncthreads ();
            if( warp == 0)
                scan.init(ptr);
            __syncthreads ();
            if( lane == 31 )
                ptr [scan.warp_offset + warp ] = (Kind == INCLUSIVE) ? val : ptr [idx];
            __syncthreads ();
            if( warp == 0 )
                merge_scan(ptr, idx);
            __syncthreads();
            if ( warp > 0)
                val = ptr [scan.warp_offset + warp - 1] + val;
            __syncthreads ();
            ptr[idx] = val;
            __syncthreads ();
            return val ;
        }
        static const int warp_log  = 5;
        static const int warp_mask = 31;
    };
    template <typename T>
    __device__ T warpScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
    {
    #if __CUDA_ARCH__ >= 300
        const unsigned int laneId = cv::cuda::device::Warp::laneId();
        // scan on shuffl functions
        #pragma unroll
        for (int i = 1; i <= (OPENCV_CUDA_WARP_SIZE / 2); i *= 2)
        {
            const T n = cv::cuda::device::shfl_up(idata, i);
            if (laneId >= i)
                  idata += n;
        }
        return idata;
    #else
        unsigned int pos = 2 * tid - (tid & (OPENCV_CUDA_WARP_SIZE - 1));
        s_Data[pos] = 0;
        pos += OPENCV_CUDA_WARP_SIZE;
        s_Data[pos] = idata;
        s_Data[pos] += s_Data[pos - 1];
        s_Data[pos] += s_Data[pos - 2];
        s_Data[pos] += s_Data[pos - 4];
        s_Data[pos] += s_Data[pos - 8];
        s_Data[pos] += s_Data[pos - 16];
        return s_Data[pos];
    #endif
    }
    template <typename T>
    __device__ __forceinline__ T warpScanExclusive(T idata, volatile T* s_Data, unsigned int tid)
    {
        return warpScanInclusive(idata, s_Data, tid) - idata;
    }
    template <int tiNumScanThreads, typename T>
    __device__ T blockScanInclusive(T idata, volatile T* s_Data, unsigned int tid)
    {
        if (tiNumScanThreads > OPENCV_CUDA_WARP_SIZE)
        {
            //Bottom-level inclusive warp scan
            T warpResult = warpScanInclusive(idata, s_Data, tid);
            //Save top elements of each warp for exclusive warp scan
            //sync to wait for warp scans to complete (because s_Data is being overwritten)
            __syncthreads();
            if ((tid & (OPENCV_CUDA_WARP_SIZE - 1)) == (OPENCV_CUDA_WARP_SIZE - 1))
            {
                s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE] = warpResult;
            }
            //wait for warp scans to complete
            __syncthreads();
            if (tid < (tiNumScanThreads / OPENCV_CUDA_WARP_SIZE) )
            {
                //grab top warp elements
                T val = s_Data[tid];
                //calculate exclusive scan and write back to shared memory
                s_Data[tid] = warpScanExclusive(val, s_Data, tid);
            }
            //return updated warp scans with exclusive scan results
            __syncthreads();
            return warpResult + s_Data[tid >> OPENCV_CUDA_LOG_WARP_SIZE];
        }
        else
        {
            return warpScanInclusive(idata, s_Data, tid);
        }
    }
 }}}
 //! @endcond
 #endif // OPENCV_CUDA_SCAN_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/simd_functions.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/simd_functions.hpp
@ -0,0 +1,869 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 /*
 * Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   Redistributions of source code must retain the above copyright notice,
 *   this list of conditions and the following disclaimer.
 *
 *   Redistributions in binary form must reproduce the above copyright notice,
 *   this list of conditions and the following disclaimer in the documentation
 *   and/or other materials provided with the distribution.
 *
 *   Neither the name of NVIDIA Corporation nor the names of its contributors
 *   may be used to endorse or promote products derived from this software
 *   without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
 #ifndef OPENCV_CUDA_SIMD_FUNCTIONS_HPP
 #define OPENCV_CUDA_SIMD_FUNCTIONS_HPP
 #include "common.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    // 2
    static __device__ __forceinline__ unsigned int vadd2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vadd2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vadd.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vadd.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = a ^ b;          // sum bits
        r = a + b;          // actual sum
        s = s ^ r;          // determine carry-ins for each bit position
        s = s & 0x00010000; // carry-in to high word (= carry-out from low word)
        r = r - s;          // subtract out carry-out from low word
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsub2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vsub2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vsub.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vsub.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = a ^ b;          // sum bits
        r = a - b;          // actual sum
        s = s ^ r;          // determine carry-ins for each bit position
        s = s & 0x00010000; // borrow to high word
        r = r + s;          // compensate for borrow from low word
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vabsdiff2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vabsdiff2.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vabsdiff.u32.u32.u32.sat %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vabsdiff.u32.u32.u32.sat %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t, u, v;
        s = a & 0x0000ffff; // extract low halfword
        r = b & 0x0000ffff; // extract low halfword
        u = ::max(r, s);    // maximum of low halfwords
        v = ::min(r, s);    // minimum of low halfwords
        s = a & 0xffff0000; // extract high halfword
        r = b & 0xffff0000; // extract high halfword
        t = ::max(r, s);    // maximum of high halfwords
        s = ::min(r, s);    // minimum of high halfwords
        r = u | t;          // maximum of both halfwords
        s = v | s;          // minimum of both halfwords
        r = r - s;          // |a - b| = max(a,b) - min(a,b);
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vavg2(unsigned int a, unsigned int b)
    {
        unsigned int r, s;
        // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
        // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
        s = a ^ b;
        r = a & b;
        s = s & 0xfffefffe; // ensure shift doesn't cross halfword boundaries
        s = s >> 1;
        s = r + s;
        return s;
    }
    static __device__ __forceinline__ unsigned int vavrg2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vavrg2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
        // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
        unsigned int s;
        s = a ^ b;
        r = a | b;
        s = s & 0xfffefffe; // ensure shift doesn't cross half-word boundaries
        s = s >> 1;
        r = r - s;
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vseteq2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        unsigned int c;
        r = a ^ b;          // 0x0000 if a == b
        c = r | 0x80008000; // set msbs, to catch carry out
        r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
        c = r & ~c;         // msb = 1, if r was 0x0000
        r = c >> 15;        // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmpeq2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vseteq2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        r = a ^ b;          // 0x0000 if a == b
        c = r | 0x80008000; // set msbs, to catch carry out
        r = r ^ c;          // extract msbs, msb = 1 if r < 0x8000
        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
        c = r & ~c;         // msb = 1, if r was 0x0000
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetge2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmpge2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vsetge2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavrg2(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetgt2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
        c = c & 0x80008000; // msbs = carry-outs
        r = c >> 15;        // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmpgt2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vsetgt2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavg2(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
        c = c & 0x80008000; // msbs = carry-outs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetle2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmple2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vsetle2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavrg2(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetlt2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset2.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmplt2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vsetlt2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavg2(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
        c = c & 0x80008000; // msb = carry-outs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetne2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm ("vset2.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        unsigned int c;
        r = a ^ b;          // 0x0000 if a == b
        c = r | 0x80008000; // set msbs, to catch carry out
        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
        c = r | c;          // msb = 1, if r was not 0x0000
        c = c & 0x80008000; // extract msbs
        r = c >> 15;        // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmpne2(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vsetne2(a, b);
        c = r << 16;        // convert bool
        r = c - r;          //  into mask
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        r = a ^ b;          // 0x0000 if a == b
        c = r | 0x80008000; // set msbs, to catch carry out
        c = c - 0x00010001; // msb = 0, if r was 0x0000 or 0x8000
        c = r | c;          // msb = 1, if r was not 0x0000
        c = c & 0x80008000; // extract msbs
        r = c >> 15;        // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vmax2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vmax2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vmax.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmax.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t, u;
        r = a & 0x0000ffff; // extract low halfword
        s = b & 0x0000ffff; // extract low halfword
        t = ::max(r, s);    // maximum of low halfwords
        r = a & 0xffff0000; // extract high halfword
        s = b & 0xffff0000; // extract high halfword
        u = ::max(r, s);    // maximum of high halfwords
        r = t | u;          // combine halfword maximums
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vmin2(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vmin2.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vmin.u32.u32.u32 %0.h0, %1.h0, %2.h0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmin.u32.u32.u32 %0.h1, %1.h1, %2.h1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t, u;
        r = a & 0x0000ffff; // extract low halfword
        s = b & 0x0000ffff; // extract low halfword
        t = ::min(r, s);    // minimum of low halfwords
        r = a & 0xffff0000; // extract high halfword
        s = b & 0xffff0000; // extract high halfword
        u = ::min(r, s);    // minimum of high halfwords
        r = t | u;          // combine halfword minimums
    #endif
        return r;
    }
    // 4
    static __device__ __forceinline__ unsigned int vadd4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vadd4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vadd.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vadd.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vadd.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vadd.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t;
        s = a ^ b;          // sum bits
        r = a & 0x7f7f7f7f; // clear msbs
        t = b & 0x7f7f7f7f; // clear msbs
        s = s & 0x80808080; // msb sum bits
        r = r + t;          // add without msbs, record carry-out in msbs
        r = r ^ s;          // sum of msb sum and carry-in bits, w/o carry-out
    #endif /* __CUDA_ARCH__ >= 300 */
        return r;
    }
    static __device__ __forceinline__ unsigned int vsub4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vsub4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vsub.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vsub.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vsub.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vsub.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s, t;
        s = a ^ ~b;         // inverted sum bits
        r = a | 0x80808080; // set msbs
        t = b & 0x7f7f7f7f; // clear msbs
        s = s & 0x80808080; // inverted msb sum bits
        r = r - t;          // subtract w/o msbs, record inverted borrows in msb
        r = r ^ s;          // combine inverted msb sum bits and borrows
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vavg4(unsigned int a, unsigned int b)
    {
        unsigned int r, s;
        // HAKMEM #23: a + b = 2 * (a & b) + (a ^ b) ==>
        // (a + b) / 2 = (a & b) + ((a ^ b) >> 1)
        s = a ^ b;
        r = a & b;
        s = s & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
        s = s >> 1;
        s = r + s;
        return s;
    }
    static __device__ __forceinline__ unsigned int vavrg4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vavrg4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // HAKMEM #23: a + b = 2 * (a | b) - (a ^ b) ==>
        // (a + b + 1) / 2 = (a | b) - ((a ^ b) >> 1)
        unsigned int c;
        c = a ^ b;
        r = a | b;
        c = c & 0xfefefefe; // ensure following shift doesn't cross byte boundaries
        c = c >> 1;
        r = r - c;
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vseteq4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.eq %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        unsigned int c;
        r = a ^ b;          // 0x00 if a == b
        c = r | 0x80808080; // set msbs, to catch carry out
        r = r ^ c;          // extract msbs, msb = 1 if r < 0x80
        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
        c = r & ~c;         // msb = 1, if r was 0x00
        r = c >> 7;         // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmpeq4(unsigned int a, unsigned int b)
    {
        unsigned int r, t;
    #if __CUDA_ARCH__ >= 300
        r = vseteq4(a, b);
        t = r << 8;         // convert bool
        r = t - r;          //  to mask
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        t = a ^ b;          // 0x00 if a == b
        r = t | 0x80808080; // set msbs, to catch carry out
        t = t ^ r;          // extract msbs, msb = 1 if t < 0x80
        r = r - 0x01010101; // msb = 0, if t was 0x00 or 0x80
        r = t & ~r;         // msb = 1, if t was 0x00
        t = r >> 7;         // build mask
        t = r - t;          //  from
        r = t | r;          //   msbs
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetle4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.le %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmple4(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vsetle4(a, b);
        c = r << 8;         // convert bool
        r = c - r;          //  to mask
    #else
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavrg4(a, b);   // (b + ~a + 1) / 2 = (b - a) / 2
        c = c & 0x80808080; // msbs = carry-outs
        r = c >> 7;         // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetlt4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.lt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmplt4(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vsetlt4(a, b);
        c = r << 8;         // convert bool
        r = c - r;          //  to mask
    #else
        asm("not.b32 %0, %0;" : "+r"(a));
        c = vavg4(a, b);    // (b + ~a) / 2 = (b - a) / 2 [rounded down]
        c = c & 0x80808080; // msbs = carry-outs
        r = c >> 7;         // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetge4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.ge %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavrg4(a, b);   // (a + ~b + 1) / 2 = (a - b) / 2
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmpge4(unsigned int a, unsigned int b)
    {
        unsigned int r, s;
    #if __CUDA_ARCH__ >= 300
        r = vsetge4(a, b);
        s = r << 8;         // convert bool
        r = s - r;          //  to mask
    #else
        asm ("not.b32 %0,%0;" : "+r"(b));
        r = vavrg4 (a, b);  // (a + ~b + 1) / 2 = (a - b) / 2
        r = r & 0x80808080; // msb = carry-outs
        s = r >> 7;         // build mask
        s = r - s;          //  from
        r = s | r;          //   msbs
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetgt4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.gt %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int c;
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmpgt4(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vsetgt4(a, b);
        c = r << 8;         // convert bool
        r = c - r;          //  to mask
    #else
        asm("not.b32 %0, %0;" : "+r"(b));
        c = vavg4(a, b);    // (a + ~b) / 2 = (a - b) / 2 [rounded down]
        c = c & 0x80808080; // msb = carry-outs
        r = c >> 7;         // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vsetne4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vset4.u32.u32.ne %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        unsigned int c;
        r = a ^ b;          // 0x00 if a == b
        c = r | 0x80808080; // set msbs, to catch carry out
        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
        c = r | c;          // msb = 1, if r was not 0x00
        c = c & 0x80808080; // extract msbs
        r = c >> 7;         // convert to bool
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vcmpne4(unsigned int a, unsigned int b)
    {
        unsigned int r, c;
    #if __CUDA_ARCH__ >= 300
        r = vsetne4(a, b);
        c = r << 8;         // convert bool
        r = c - r;          //  to mask
    #else
        // inspired by Alan Mycroft's null-byte detection algorithm:
        // null_byte(x) = ((x - 0x01010101) & (~x & 0x80808080))
        r = a ^ b;          // 0x00 if a == b
        c = r | 0x80808080; // set msbs, to catch carry out
        c = c - 0x01010101; // msb = 0, if r was 0x00 or 0x80
        c = r | c;          // msb = 1, if r was not 0x00
        c = c & 0x80808080; // extract msbs
        r = c >> 7;         // convert
        r = c - r;          //  msbs to
        r = c | r;          //   mask
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vabsdiff4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vabsdiff4.u32.u32.u32.sat %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vabsdiff.u32.u32.u32.sat %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vabsdiff.u32.u32.u32.sat %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vabsdiff.u32.u32.u32.sat %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vabsdiff.u32.u32.u32.sat %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = vcmpge4(a, b);  // mask = 0xff if a >= b
        r = a ^ b;          //
        s = (r &  s) ^ b;   // select a when a >= b, else select b => max(a,b)
        r = s ^ r;          // select a when b >= a, else select b => min(a,b)
        r = s - r;          // |a - b| = max(a,b) - min(a,b);
    #endif
        return r;
    }
    static __device__ __forceinline__ unsigned int vmax4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vmax4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vmax.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmax.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmax.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmax.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = vcmpge4(a, b);  // mask = 0xff if a >= b
        r = a & s;          // select a when b >= a
        s = b & ~s;         // select b when b < a
        r = r | s;          // combine byte selections
    #endif
        return r;           // byte-wise unsigned maximum
    }
    static __device__ __forceinline__ unsigned int vmin4(unsigned int a, unsigned int b)
    {
        unsigned int r = 0;
    #if __CUDA_ARCH__ >= 300
        asm("vmin4.u32.u32.u32 %0, %1, %2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #elif __CUDA_ARCH__ >= 200
        asm("vmin.u32.u32.u32 %0.b0, %1.b0, %2.b0, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmin.u32.u32.u32 %0.b1, %1.b1, %2.b1, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmin.u32.u32.u32 %0.b2, %1.b2, %2.b2, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
        asm("vmin.u32.u32.u32 %0.b3, %1.b3, %2.b3, %3;" : "=r"(r) : "r"(a), "r"(b), "r"(r));
    #else
        unsigned int s;
        s = vcmpge4(b, a);  // mask = 0xff if a >= b
        r = a & s;          // select a when b >= a
        s = b & ~s;         // select b when b < a
        r = r | s;          // combine byte selections
    #endif
        return r;
    }
 }}}
 //! @endcond
 #endif // OPENCV_CUDA_SIMD_FUNCTIONS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/transform.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/transform.hpp
@ -0,0 +1,75 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_TRANSFORM_HPP
 #define OPENCV_CUDA_TRANSFORM_HPP
 #include "common.hpp"
 #include "utility.hpp"
 #include "detail/transform_detail.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template <typename T, typename D, typename UnOp, typename Mask>
    static inline void transform(PtrStepSz<T> src, PtrStepSz<D> dst, UnOp op, const Mask& mask, cudaStream_t stream)
    {
        typedef TransformFunctorTraits<UnOp> ft;
        transform_detail::TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
    }
    template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
    static inline void transform(PtrStepSz<T1> src1, PtrStepSz<T2> src2, PtrStepSz<D> dst, BinOp op, const Mask& mask, cudaStream_t stream)
    {
        typedef TransformFunctorTraits<BinOp> ft;
        transform_detail::TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
    }
 }}}
 //! @endcond
 #endif // OPENCV_CUDA_TRANSFORM_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/type_traits.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/type_traits.hpp
@ -0,0 +1,90 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_TYPE_TRAITS_HPP
 #define OPENCV_CUDA_TYPE_TRAITS_HPP
 #include "detail/type_traits_detail.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct IsSimpleParameter
    {
        enum {value = type_traits_detail::IsIntegral<T>::value || type_traits_detail::IsFloat<T>::value ||
            type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<T>::type>::value};
    };
    template <typename T> struct TypeTraits
    {
        typedef typename type_traits_detail::UnConst<T>::type                                                NonConstType;
        typedef typename type_traits_detail::UnVolatile<T>::type                                             NonVolatileType;
        typedef typename type_traits_detail::UnVolatile<typename type_traits_detail::UnConst<T>::type>::type UnqualifiedType;
        typedef typename type_traits_detail::PointerTraits<UnqualifiedType>::type                            PointeeType;
        typedef typename type_traits_detail::ReferenceTraits<T>::type                                        ReferredType;
        enum { isConst          = type_traits_detail::UnConst<T>::value };
        enum { isVolatile       = type_traits_detail::UnVolatile<T>::value };
        enum { isReference      = type_traits_detail::ReferenceTraits<UnqualifiedType>::value };
        enum { isPointer        = type_traits_detail::PointerTraits<typename type_traits_detail::ReferenceTraits<UnqualifiedType>::type>::value };
        enum { isUnsignedInt    = type_traits_detail::IsUnsignedIntegral<UnqualifiedType>::value };
        enum { isSignedInt      = type_traits_detail::IsSignedIntergral<UnqualifiedType>::value };
        enum { isIntegral       = type_traits_detail::IsIntegral<UnqualifiedType>::value };
        enum { isFloat          = type_traits_detail::IsFloat<UnqualifiedType>::value };
        enum { isArith          = isIntegral || isFloat };
        enum { isVec            = type_traits_detail::IsVec<UnqualifiedType>::value };
        typedef typename type_traits_detail::Select<IsSimpleParameter<UnqualifiedType>::value,
            T, typename type_traits_detail::AddParameterType<T>::type>::type ParameterType;
    };
 }}}
 //! @endcond
 #endif // OPENCV_CUDA_TYPE_TRAITS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/utility.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/utility.hpp
@ -0,0 +1,230 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_UTILITY_HPP
 #define OPENCV_CUDA_UTILITY_HPP
 #include "saturate_cast.hpp"
 #include "datamov_utils.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    struct CV_EXPORTS ThrustAllocator
    {
        typedef uchar value_type;
        virtual ~ThrustAllocator();
        virtual __device__ __host__ uchar* allocate(size_t numBytes) = 0;
        virtual __device__ __host__ void deallocate(uchar* ptr, size_t numBytes) = 0;
        static ThrustAllocator& getAllocator();
        static void setAllocator(ThrustAllocator* allocator);
    };
    #define OPENCV_CUDA_LOG_WARP_SIZE        (5)
    #define OPENCV_CUDA_WARP_SIZE            (1 << OPENCV_CUDA_LOG_WARP_SIZE)
    #define OPENCV_CUDA_LOG_MEM_BANKS        ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
    #define OPENCV_CUDA_MEM_BANKS            (1 << OPENCV_CUDA_LOG_MEM_BANKS)
    ///////////////////////////////////////////////////////////////////////////////
    // swap
    template <typename T> void __device__ __host__ __forceinline__ swap(T& a, T& b)
    {
        const T temp = a;
        a = b;
        b = temp;
    }
    ///////////////////////////////////////////////////////////////////////////////
    // Mask Reader
    struct SingleMask
    {
        explicit __host__ __device__ __forceinline__ SingleMask(PtrStepb mask_) : mask(mask_) {}
        __host__ __device__ __forceinline__ SingleMask(const SingleMask& mask_): mask(mask_.mask){}
        __device__ __forceinline__ bool operator()(int y, int x) const
        {
            return mask.ptr(y)[x] != 0;
        }
        PtrStepb mask;
    };
    struct SingleMaskChannels
    {
        __host__ __device__ __forceinline__ SingleMaskChannels(PtrStepb mask_, int channels_)
        : mask(mask_), channels(channels_) {}
        __host__ __device__ __forceinline__ SingleMaskChannels(const SingleMaskChannels& mask_)
            :mask(mask_.mask), channels(mask_.channels){}
        __device__ __forceinline__ bool operator()(int y, int x) const
        {
            return mask.ptr(y)[x / channels] != 0;
        }
        PtrStepb mask;
        int channels;
    };
    struct MaskCollection
    {
        explicit __host__ __device__ __forceinline__ MaskCollection(PtrStepb* maskCollection_)
            : maskCollection(maskCollection_) {}
        __device__ __forceinline__ MaskCollection(const MaskCollection& masks_)
            : maskCollection(masks_.maskCollection), curMask(masks_.curMask){}
        __device__ __forceinline__ void next()
        {
            curMask = *maskCollection++;
        }
        __device__ __forceinline__ void setMask(int z)
        {
            curMask = maskCollection[z];
        }
        __device__ __forceinline__ bool operator()(int y, int x) const
        {
            uchar val;
            return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(y), x, val), (val != 0));
        }
        const PtrStepb* maskCollection;
        PtrStepb curMask;
    };
    struct WithOutMask
    {
        __host__ __device__ __forceinline__ WithOutMask(){}
        __host__ __device__ __forceinline__ WithOutMask(const WithOutMask&){}
        __device__ __forceinline__ void next() const
        {
        }
        __device__ __forceinline__ void setMask(int) const
        {
        }
        __device__ __forceinline__ bool operator()(int, int) const
        {
            return true;
        }
        __device__ __forceinline__ bool operator()(int, int, int) const
        {
            return true;
        }
        static __device__ __forceinline__ bool check(int, int)
        {
            return true;
        }
        static __device__ __forceinline__ bool check(int, int, int)
        {
            return true;
        }
    };
    ///////////////////////////////////////////////////////////////////////////////
    // Solve linear system
    // solve 2x2 linear system Ax=b
    template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
    {
        T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
        if (det != 0)
        {
            double invdet = 1.0 / det;
            x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
            x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
            return true;
        }
        return false;
    }
    // solve 3x3 linear system Ax=b
    template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
    {
        T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
              - A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
              + A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
        if (det != 0)
        {
            double invdet = 1.0 / det;
            x[0] = saturate_cast<T>(invdet *
                (b[0]    * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
                 A[0][1] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) +
                 A[0][2] * (b[1]    * A[2][1] - A[1][1] * b[2]   )));
            x[1] = saturate_cast<T>(invdet *
                (A[0][0] * (b[1]    * A[2][2] - A[1][2] * b[2]   ) -
                 b[0]    * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
                 A[0][2] * (A[1][0] * b[2]    - b[1]    * A[2][0])));
            x[2] = saturate_cast<T>(invdet *
                (A[0][0] * (A[1][1] * b[2]    - b[1]    * A[2][1]) -
                 A[0][1] * (A[1][0] * b[2]    - b[1]    * A[2][0]) +
                 b[0]    * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
            return true;
        }
        return false;
    }
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_UTILITY_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/vec_distance.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/vec_distance.hpp
@ -0,0 +1,232 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_VEC_DISTANCE_HPP
 #define OPENCV_CUDA_VEC_DISTANCE_HPP
 #include "reduce.hpp"
 #include "functional.hpp"
 #include "detail/vec_distance_detail.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template <typename T> struct L1Dist
    {
        typedef int value_type;
        typedef int result_type;
        __device__ __forceinline__ L1Dist() : mySum(0) {}
        __device__ __forceinline__ void reduceIter(int val1, int val2)
        {
            mySum = __sad(val1, val2, mySum);
        }
        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
        {
            reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
        }
        __device__ __forceinline__ operator int() const
        {
            return mySum;
        }
        int mySum;
    };
    template <> struct L1Dist<float>
    {
        typedef float value_type;
        typedef float result_type;
        __device__ __forceinline__ L1Dist() : mySum(0.0f) {}
        __device__ __forceinline__ void reduceIter(float val1, float val2)
        {
            mySum += ::fabs(val1 - val2);
        }
        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
        {
            reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
        }
        __device__ __forceinline__ operator float() const
        {
            return mySum;
        }
        float mySum;
    };
    struct L2Dist
    {
        typedef float value_type;
        typedef float result_type;
        __device__ __forceinline__ L2Dist() : mySum(0.0f) {}
        __device__ __forceinline__ void reduceIter(float val1, float val2)
        {
            float reg = val1 - val2;
            mySum += reg * reg;
        }
        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(float* smem, int tid)
        {
            reduce<THREAD_DIM>(smem, mySum, tid, plus<float>());
        }
        __device__ __forceinline__ operator float() const
        {
            return sqrtf(mySum);
        }
        float mySum;
    };
    struct HammingDist
    {
        typedef int value_type;
        typedef int result_type;
        __device__ __forceinline__ HammingDist() : mySum(0) {}
        __device__ __forceinline__ void reduceIter(int val1, int val2)
        {
            mySum += __popc(val1 ^ val2);
        }
        template <int THREAD_DIM> __device__ __forceinline__ void reduceAll(int* smem, int tid)
        {
            reduce<THREAD_DIM>(smem, mySum, tid, plus<int>());
        }
        __device__ __forceinline__ operator int() const
        {
            return mySum;
        }
        int mySum;
    };
    // calc distance between two vectors in global memory
    template <int THREAD_DIM, typename Dist, typename T1, typename T2>
    __device__ void calcVecDiffGlobal(const T1* vec1, const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid)
    {
        for (int i = tid; i < len; i += THREAD_DIM)
        {
            T1 val1;
            ForceGlob<T1>::Load(vec1, i, val1);
            T2 val2;
            ForceGlob<T2>::Load(vec2, i, val2);
            dist.reduceIter(val1, val2);
        }
        dist.reduceAll<THREAD_DIM>(smem, tid);
    }
    // calc distance between two vectors, first vector is cached in register or shared memory, second vector is in global memory
    template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename Dist, typename T1, typename T2>
    __device__ __forceinline__ void calcVecDiffCached(const T1* vecCached, const T2* vecGlob, int len, Dist& dist, typename Dist::result_type* smem, int tid)
    {
        vec_distance_detail::VecDiffCachedCalculator<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>::calc(vecCached, vecGlob, len, dist, tid);
        dist.reduceAll<THREAD_DIM>(smem, tid);
    }
    // calc distance between two vectors in global memory
    template <int THREAD_DIM, typename T1> struct VecDiffGlobal
    {
        explicit __device__ __forceinline__ VecDiffGlobal(const T1* vec1_, int = 0, void* = 0, int = 0, int = 0)
        {
            vec1 = vec1_;
        }
        template <typename T2, typename Dist>
        __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
        {
            calcVecDiffGlobal<THREAD_DIM>(vec1, vec2, len, dist, smem, tid);
        }
        const T1* vec1;
    };
    // calc distance between two vectors, first vector is cached in register memory, second vector is in global memory
    template <int THREAD_DIM, int MAX_LEN, bool LEN_EQ_MAX_LEN, typename U> struct VecDiffCachedRegister
    {
        template <typename T1> __device__ __forceinline__ VecDiffCachedRegister(const T1* vec1, int len, U* smem, int glob_tid, int tid)
        {
            if (glob_tid < len)
                smem[glob_tid] = vec1[glob_tid];
            __syncthreads();
            U* vec1ValsPtr = vec1Vals;
            #pragma unroll
            for (int i = tid; i < MAX_LEN; i += THREAD_DIM)
                *vec1ValsPtr++ = smem[i];
            __syncthreads();
        }
        template <typename T2, typename Dist>
        __device__ __forceinline__ void calc(const T2* vec2, int len, Dist& dist, typename Dist::result_type* smem, int tid) const
        {
            calcVecDiffCached<THREAD_DIM, MAX_LEN, LEN_EQ_MAX_LEN>(vec1Vals, vec2, len, dist, smem, tid);
        }
        U vec1Vals[MAX_LEN / THREAD_DIM];
    };
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_VEC_DISTANCE_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/vec_math.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/vec_math.hpp
@ -0,0 +1,930 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_VECMATH_HPP
 #define OPENCV_CUDA_VECMATH_HPP
 #include "vec_traits.hpp"
 #include "saturate_cast.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
 // saturate_cast
 namespace vec_math_detail
 {
    template <int cn, typename VecD> struct SatCastHelper;
    template <typename VecD> struct SatCastHelper<1, VecD>
    {
        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
        {
            typedef typename VecTraits<VecD>::elem_type D;
            return VecTraits<VecD>::make(saturate_cast<D>(v.x));
        }
    };
    template <typename VecD> struct SatCastHelper<2, VecD>
    {
        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
        {
            typedef typename VecTraits<VecD>::elem_type D;
            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
        }
    };
    template <typename VecD> struct SatCastHelper<3, VecD>
    {
        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
        {
            typedef typename VecTraits<VecD>::elem_type D;
            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
        }
    };
    template <typename VecD> struct SatCastHelper<4, VecD>
    {
        template <typename VecS> static __device__ __forceinline__ VecD cast(const VecS& v)
        {
            typedef typename VecTraits<VecD>::elem_type D;
            return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
        }
    };
    template <typename VecD, typename VecS> static __device__ __forceinline__ VecD saturate_cast_helper(const VecS& v)
    {
        return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
    }
 }
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const char1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const short1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uint1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const int1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const float1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const double1& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const char2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const short2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uint2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const int2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const float2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const double2& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const char3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const short3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uint3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const int3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const float3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const double3& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uchar4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const char4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const ushort4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const short4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const uint4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const int4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const float4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 template<typename T> static __device__ __forceinline__ T saturate_cast(const double4& v) {return vec_math_detail::saturate_cast_helper<T>(v);}
 // unary operators
 #define CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(op, input_type, output_type) \
    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a) \
    { \
        return VecTraits<output_type ## 1>::make(op (a.x)); \
    } \
    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a) \
    { \
        return VecTraits<output_type ## 2>::make(op (a.x), op (a.y)); \
    } \
    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a) \
    { \
        return VecTraits<output_type ## 3>::make(op (a.x), op (a.y), op (a.z)); \
    } \
    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a) \
    { \
        return VecTraits<output_type ## 4>::make(op (a.x), op (a.y), op (a.z), op (a.w)); \
    }
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, char, char)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, short, short)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, int, int)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(-, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(!, double, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, char, char)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, short, short)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, int, int)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
 #undef CV_CUDEV_IMPLEMENT_VEC_UNARY_OP
 // unary functions
 #define CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(func_name, func, input_type, output_type) \
    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a) \
    { \
        return VecTraits<output_type ## 1>::make(func (a.x)); \
    } \
    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a) \
    { \
        return VecTraits<output_type ## 2>::make(func (a.x), func (a.y)); \
    } \
    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a) \
    { \
        return VecTraits<output_type ## 3>::make(func (a.x), func (a.y), func (a.z)); \
    } \
    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a) \
    { \
        return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
    }
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabs, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrtf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sqrt, ::sqrt, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::expf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp, ::exp, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp2, ::exp2, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(exp10, ::exp10, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::logf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log, ::log, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log2, ::log2, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(log10, ::log10, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sinf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sin, ::sin, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cosf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cos, ::cos, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tanf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tan, ::tan, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asinf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asin, ::asin, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acosf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acos, ::acos, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atanf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atan, ::atan, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinhf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(sinh, ::sinh, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::coshf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(cosh, ::cosh, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanhf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(tanh, ::tanh, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinhf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(asinh, ::asinh, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acoshf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(acosh, ::acosh, double, double)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanhf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(atanh, ::atanh, double, double)
 #undef CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC
 // binary operators (vec & vec)
 #define CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(op, input_type, output_type) \
    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, const input_type ## 1 & b) \
    { \
        return VecTraits<output_type ## 1>::make(a.x op b.x); \
    } \
    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, const input_type ## 2 & b) \
    { \
        return VecTraits<output_type ## 2>::make(a.x op b.x, a.y op b.y); \
    } \
    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, const input_type ## 3 & b) \
    { \
        return VecTraits<output_type ## 3>::make(a.x op b.x, a.y op b.y, a.z op b.z); \
    } \
    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, const input_type ## 4 & b) \
    { \
        return VecTraits<output_type ## 4>::make(a.x op b.x, a.y op b.y, a.z op b.z, a.w op b.w); \
    }
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uchar, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, char, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, ushort, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, short, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(+, double, double)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uchar, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, char, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, ushort, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, short, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(-, double, double)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uchar, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, char, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, ushort, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, short, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(*, double, double)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uchar, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, char, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, ushort, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, short, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(/, double, double)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(==, double, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(!=, double, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>, double, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<, double, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(>=, double, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(<=, double, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&&, double, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, char, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, ushort, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, short, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, int, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, uint, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, float, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(||, double, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(&, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(|, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_OP(^, uint, uint)
 #undef CV_CUDEV_IMPLEMENT_VEC_BINARY_OP
 // binary operators (vec & scalar)
 #define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(op, input_type, scalar_type, output_type) \
    __device__ __forceinline__ output_type ## 1 operator op(const input_type ## 1 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 1>::make(a.x op s); \
    } \
    __device__ __forceinline__ output_type ## 1 operator op(scalar_type s, const input_type ## 1 & b) \
    { \
        return VecTraits<output_type ## 1>::make(s op b.x); \
    } \
    __device__ __forceinline__ output_type ## 2 operator op(const input_type ## 2 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 2>::make(a.x op s, a.y op s); \
    } \
    __device__ __forceinline__ output_type ## 2 operator op(scalar_type s, const input_type ## 2 & b) \
    { \
        return VecTraits<output_type ## 2>::make(s op b.x, s op b.y); \
    } \
    __device__ __forceinline__ output_type ## 3 operator op(const input_type ## 3 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 3>::make(a.x op s, a.y op s, a.z op s); \
    } \
    __device__ __forceinline__ output_type ## 3 operator op(scalar_type s, const input_type ## 3 & b) \
    { \
        return VecTraits<output_type ## 3>::make(s op b.x, s op b.y, s op b.z); \
    } \
    __device__ __forceinline__ output_type ## 4 operator op(const input_type ## 4 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 4>::make(a.x op s, a.y op s, a.z op s, a.w op s); \
    } \
    __device__ __forceinline__ output_type ## 4 operator op(scalar_type s, const input_type ## 4 & b) \
    { \
        return VecTraits<output_type ## 4>::make(s op b.x, s op b.y, s op b.z, s op b.w); \
    }
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(+, double, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(-, double, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(*, double, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(/, double, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(==, double, double, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(!=, double, double, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>, double, double, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<, double, double, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(>=, double, double, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(<=, double, double, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&&, double, double, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, char, char, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, ushort, ushort, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, short, short, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, int, int, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, uint, uint, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, float, float, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(||, double, double, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(&, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(|, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP(^, uint, uint, uint)
 #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_OP
 // binary function (vec & vec)
 #define CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(func_name, func, input_type, output_type) \
    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, const input_type ## 1 & b) \
    { \
        return VecTraits<output_type ## 1>::make(func (a.x, b.x)); \
    } \
    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, const input_type ## 2 & b) \
    { \
        return VecTraits<output_type ## 2>::make(func (a.x, b.x), func (a.y, b.y)); \
    } \
    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, const input_type ## 3 & b) \
    { \
        return VecTraits<output_type ## 3>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z)); \
    } \
    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, const input_type ## 4 & b) \
    { \
        return VecTraits<output_type ## 4>::make(func (a.x, b.x), func (a.y, b.y), func (a.z, b.z), func (a.w, b.w)); \
    }
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::max, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmaxf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(max, ::fmax, double, double)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uchar, uchar)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, char, char)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, ushort, ushort)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, short, short)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, uint, uint)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::min, int, int)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fminf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(min, ::fmin, double, double)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, char, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, short, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, int, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypotf, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(hypot, ::hypot, double, double)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uchar, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, char, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, ushort, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, short, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, uint, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, int, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2f, float, float)
 CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC(atan2, ::atan2, double, double)
 #undef CV_CUDEV_IMPLEMENT_VEC_BINARY_FUNC
 // binary function (vec & scalar)
 #define CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(func_name, func, input_type, scalar_type, output_type) \
    __device__ __forceinline__ output_type ## 1 func_name(const input_type ## 1 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 1>::make(func ((output_type) a.x, (output_type) s)); \
    } \
    __device__ __forceinline__ output_type ## 1 func_name(scalar_type s, const input_type ## 1 & b) \
    { \
        return VecTraits<output_type ## 1>::make(func ((output_type) s, (output_type) b.x)); \
    } \
    __device__ __forceinline__ output_type ## 2 func_name(const input_type ## 2 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 2>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s)); \
    } \
    __device__ __forceinline__ output_type ## 2 func_name(scalar_type s, const input_type ## 2 & b) \
    { \
        return VecTraits<output_type ## 2>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y)); \
    } \
    __device__ __forceinline__ output_type ## 3 func_name(const input_type ## 3 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 3>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s)); \
    } \
    __device__ __forceinline__ output_type ## 3 func_name(scalar_type s, const input_type ## 3 & b) \
    { \
        return VecTraits<output_type ## 3>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z)); \
    } \
    __device__ __forceinline__ output_type ## 4 func_name(const input_type ## 4 & a, scalar_type s) \
    { \
        return VecTraits<output_type ## 4>::make(func ((output_type) a.x, (output_type) s), func ((output_type) a.y, (output_type) s), func ((output_type) a.z, (output_type) s), func ((output_type) a.w, (output_type) s)); \
    } \
    __device__ __forceinline__ output_type ## 4 func_name(scalar_type s, const input_type ## 4 & b) \
    { \
        return VecTraits<output_type ## 4>::make(func ((output_type) s, (output_type) b.x), func ((output_type) s, (output_type) b.y), func ((output_type) s, (output_type) b.z), func ((output_type) s, (output_type) b.w)); \
    }
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::max, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmaxf, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(max, ::fmax, double, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uchar, uchar, uchar)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, char, char, char)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, ushort, ushort, ushort)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, short, short, short)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, uint, uint, uint)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::min, int, int, int)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fminf, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(min, ::fmin, double, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypotf, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(hypot, ::hypot, double, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uchar, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uchar, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, char, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, char, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, ushort, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, ushort, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, short, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, short, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, uint, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, uint, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, int, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, int, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2f, float, float, float)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, float, double, double)
 CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC(atan2, ::atan2, double, double, double)
 #undef CV_CUDEV_IMPLEMENT_SCALAR_BINARY_FUNC
 }}} // namespace cv { namespace cuda { namespace device
 //! @endcond
 #endif // OPENCV_CUDA_VECMATH_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/vec_traits.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/vec_traits.hpp
@ -0,0 +1,288 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_VEC_TRAITS_HPP
 #define OPENCV_CUDA_VEC_TRAITS_HPP
 #include "common.hpp"
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template<typename T, int N> struct TypeVec;
    struct __align__(8) uchar8
    {
        uchar a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ uchar8 make_uchar8(uchar a0, uchar a1, uchar a2, uchar a3, uchar a4, uchar a5, uchar a6, uchar a7)
    {
        uchar8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(8) char8
    {
        schar a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ char8 make_char8(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7)
    {
        char8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(16) ushort8
    {
        ushort a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ ushort8 make_ushort8(ushort a0, ushort a1, ushort a2, ushort a3, ushort a4, ushort a5, ushort a6, ushort a7)
    {
        ushort8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(16) short8
    {
        short a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ short8 make_short8(short a0, short a1, short a2, short a3, short a4, short a5, short a6, short a7)
    {
        short8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(32) uint8
    {
        uint a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ uint8 make_uint8(uint a0, uint a1, uint a2, uint a3, uint a4, uint a5, uint a6, uint a7)
    {
        uint8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(32) int8
    {
        int a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ int8 make_int8(int a0, int a1, int a2, int a3, int a4, int a5, int a6, int a7)
    {
        int8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct __align__(32) float8
    {
        float a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ float8 make_float8(float a0, float a1, float a2, float a3, float a4, float a5, float a6, float a7)
    {
        float8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
    struct double8
    {
        double a0, a1, a2, a3, a4, a5, a6, a7;
    };
    static __host__ __device__ __forceinline__ double8 make_double8(double a0, double a1, double a2, double a3, double a4, double a5, double a6, double a7)
    {
        double8 val = {a0, a1, a2, a3, a4, a5, a6, a7};
        return val;
    }
 #define OPENCV_CUDA_IMPLEMENT_TYPE_VEC(type) \
    template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
    template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
    template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
    template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
    template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
    template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
    template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
    template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; }; \
    template<> struct TypeVec<type, 8> { typedef type ## 8 vec_type; }; \
    template<> struct TypeVec<type ## 8, 8> { typedef type ## 8 vec_type; };
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uchar)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(char)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(ushort)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(short)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(int)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(uint)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(float)
    OPENCV_CUDA_IMPLEMENT_TYPE_VEC(double)
    #undef OPENCV_CUDA_IMPLEMENT_TYPE_VEC
    template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
    template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
    template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
    template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
    template<> struct TypeVec<schar, 8> { typedef char8 vec_type; };
    template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
    template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
    template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
    template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
    template<> struct TypeVec<bool, 8> { typedef uchar8 vec_type; };
    template<typename T> struct VecTraits;
 #define OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(type) \
    template<> struct VecTraits<type> \
    { \
        typedef type elem_type; \
        enum {cn=1}; \
        static __device__ __host__ __forceinline__ type all(type v) {return v;} \
        static __device__ __host__ __forceinline__ type make(type x) {return x;} \
        static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
    }; \
    template<> struct VecTraits<type ## 1> \
    { \
        typedef type elem_type; \
        enum {cn=1}; \
        static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
        static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
        static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
    }; \
    template<> struct VecTraits<type ## 2> \
    { \
        typedef type elem_type; \
        enum {cn=2}; \
        static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
        static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
        static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
    }; \
    template<> struct VecTraits<type ## 3> \
    { \
        typedef type elem_type; \
        enum {cn=3}; \
        static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
        static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
        static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
    }; \
    template<> struct VecTraits<type ## 4> \
    { \
        typedef type elem_type; \
        enum {cn=4}; \
        static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
        static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
        static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
    }; \
    template<> struct VecTraits<type ## 8> \
    { \
        typedef type elem_type; \
        enum {cn=8}; \
        static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
        static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
        static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
    };
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uchar)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(ushort)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(short)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(int)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(uint)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(float)
    OPENCV_CUDA_IMPLEMENT_VEC_TRAITS(double)
    #undef OPENCV_CUDA_IMPLEMENT_VEC_TRAITS
    template<> struct VecTraits<char>
    {
        typedef char elem_type;
        enum {cn=1};
        static __device__ __host__ __forceinline__ char all(char v) {return v;}
        static __device__ __host__ __forceinline__ char make(char x) {return x;}
        static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
    };
    template<> struct VecTraits<schar>
    {
        typedef schar elem_type;
        enum {cn=1};
        static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
        static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
        static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
    };
    template<> struct VecTraits<char1>
    {
        typedef schar elem_type;
        enum {cn=1};
        static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
        static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
        static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
    };
    template<> struct VecTraits<char2>
    {
        typedef schar elem_type;
        enum {cn=2};
        static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
        static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
        static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
    };
    template<> struct VecTraits<char3>
    {
        typedef schar elem_type;
        enum {cn=3};
        static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
        static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
        static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
    };
    template<> struct VecTraits<char4>
    {
        typedef schar elem_type;
        enum {cn=4};
        static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
        static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
        static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
    };
    template<> struct VecTraits<char8>
    {
        typedef schar elem_type;
        enum {cn=8};
        static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
        static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
        static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
    };
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif // OPENCV_CUDA_VEC_TRAITS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda/warp.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/warp.hpp
@ -0,0 +1,139 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_DEVICE_WARP_HPP
 #define OPENCV_CUDA_DEVICE_WARP_HPP
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    struct Warp
    {
        enum
        {
            LOG_WARP_SIZE = 5,
            WARP_SIZE     = 1 << LOG_WARP_SIZE,
            STRIDE        = WARP_SIZE
        };
        /** \brief Returns the warp lane ID of the calling thread. */
        static __device__ __forceinline__ unsigned int laneId()
        {
            unsigned int ret;
            asm("mov.u32 %0, %%laneid;" : "=r"(ret) );
            return ret;
        }
        template<typename It, typename T>
        static __device__ __forceinline__ void fill(It beg, It end, const T& value)
        {
            for(It t = beg + laneId(); t < end; t += STRIDE)
                *t = value;
        }
        template<typename InIt, typename OutIt>
        static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
        {
            for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
                *out = *t;
            return out;
        }
        template<typename InIt, typename OutIt, class UnOp>
        static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
        {
            for(InIt t = beg + laneId(); t < end; t += STRIDE, out += STRIDE)
                *out = op(*t);
            return out;
        }
        template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
        static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
        {
            unsigned int lane = laneId();
            InIt1 t1 = beg1 + lane;
            InIt2 t2 = beg2 + lane;
            for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, out += STRIDE)
                *out = op(*t1, *t2);
            return out;
        }
        template <class T, class BinOp>
        static __device__ __forceinline__ T reduce(volatile T *ptr, BinOp op)
        {
            const unsigned int lane = laneId();
            if (lane < 16)
            {
                T partial = ptr[lane];
                ptr[lane] = partial = op(partial, ptr[lane + 16]);
                ptr[lane] = partial = op(partial, ptr[lane + 8]);
                ptr[lane] = partial = op(partial, ptr[lane + 4]);
                ptr[lane] = partial = op(partial, ptr[lane + 2]);
                ptr[lane] = partial = op(partial, ptr[lane + 1]);
            }
            return *ptr;
        }
        template<typename OutIt, typename T>
        static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
        {
            unsigned int lane = laneId();
            value += lane;
            for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
                *t = value;
        }
    };
 }}} // namespace cv { namespace cuda { namespace cudev
 //! @endcond
 #endif /* OPENCV_CUDA_DEVICE_WARP_HPP */
--- a/3rdparty/libopencv/include/opencv2/core/cuda/warp_reduce.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/warp_reduce.hpp
@ -0,0 +1,76 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_WARP_REDUCE_HPP__
 #define OPENCV_CUDA_WARP_REDUCE_HPP__
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
    template <class T>
    __device__ __forceinline__ T warp_reduce(volatile T *ptr , const unsigned int tid = threadIdx.x)
    {
        const unsigned int lane = tid & 31; // index of thread in warp (0..31)
        if (lane < 16)
        {
            T partial = ptr[tid];
            ptr[tid] = partial = partial + ptr[tid + 16];
            ptr[tid] = partial = partial + ptr[tid + 8];
            ptr[tid] = partial = partial + ptr[tid + 4];
            ptr[tid] = partial = partial + ptr[tid + 2];
            ptr[tid] = partial = partial + ptr[tid + 1];
        }
        return ptr[tid - lane];
    }
 }}} // namespace cv { namespace cuda { namespace cudev {
 //! @endcond
 #endif /* OPENCV_CUDA_WARP_REDUCE_HPP__ */
--- a/3rdparty/libopencv/include/opencv2/core/cuda/warp_shuffle.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda/warp_shuffle.hpp
@ -0,0 +1,162 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CUDA_WARP_SHUFFLE_HPP
 #define OPENCV_CUDA_WARP_SHUFFLE_HPP
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 namespace cv { namespace cuda { namespace device
 {
 #if __CUDACC_VER_MAJOR__ >= 9
 #  define __shfl(x, y, z) __shfl_sync(0xFFFFFFFFU, x, y, z)
 #  define __shfl_up(x, y, z) __shfl_up_sync(0xFFFFFFFFU, x, y, z)
 #  define __shfl_down(x, y, z) __shfl_down_sync(0xFFFFFFFFU, x, y, z)
 #endif
    template <typename T>
    __device__ __forceinline__ T shfl(T val, int srcLane, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return __shfl(val, srcLane, width);
    #else
        return T();
    #endif
    }
    __device__ __forceinline__ unsigned int shfl(unsigned int val, int srcLane, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return (unsigned int) __shfl((int) val, srcLane, width);
    #else
        return 0;
    #endif
    }
    __device__ __forceinline__ double shfl(double val, int srcLane, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        int lo = __double2loint(val);
        int hi = __double2hiint(val);
        lo = __shfl(lo, srcLane, width);
        hi = __shfl(hi, srcLane, width);
        return __hiloint2double(hi, lo);
    #else
        return 0.0;
    #endif
    }
    template <typename T>
    __device__ __forceinline__ T shfl_down(T val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return __shfl_down(val, delta, width);
    #else
        return T();
    #endif
    }
    __device__ __forceinline__ unsigned int shfl_down(unsigned int val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return (unsigned int) __shfl_down((int) val, delta, width);
    #else
        return 0;
    #endif
    }
    __device__ __forceinline__ double shfl_down(double val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        int lo = __double2loint(val);
        int hi = __double2hiint(val);
        lo = __shfl_down(lo, delta, width);
        hi = __shfl_down(hi, delta, width);
        return __hiloint2double(hi, lo);
    #else
        return 0.0;
    #endif
    }
    template <typename T>
    __device__ __forceinline__ T shfl_up(T val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return __shfl_up(val, delta, width);
    #else
        return T();
    #endif
    }
    __device__ __forceinline__ unsigned int shfl_up(unsigned int val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        return (unsigned int) __shfl_up((int) val, delta, width);
    #else
        return 0;
    #endif
    }
    __device__ __forceinline__ double shfl_up(double val, unsigned int delta, int width = warpSize)
    {
    #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300
        int lo = __double2loint(val);
        int hi = __double2hiint(val);
        lo = __shfl_up(lo, delta, width);
        hi = __shfl_up(hi, delta, width);
        return __hiloint2double(hi, lo);
    #else
        return 0.0;
    #endif
    }
 }}}
 #  undef __shfl
 #  undef __shfl_up
 #  undef __shfl_down
 //! @endcond
 #endif // OPENCV_CUDA_WARP_SHUFFLE_HPP
--- a/3rdparty/libopencv/include/opencv2/core/cuda_stream_accessor.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda_stream_accessor.hpp
@ -0,0 +1,86 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
 #define OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP
 #ifndef __cplusplus
 #  error cuda_stream_accessor.hpp header must be compiled as C++
 #endif
 /** @file cuda_stream_accessor.hpp
 * This is only header file that depends on CUDA Runtime API. All other headers are independent.
 */
 #include <cuda_runtime.h>
 #include "opencv2/core/cuda.hpp"
 namespace cv
 {
    namespace cuda
    {
 //! @addtogroup cudacore_struct
 //! @{
        /** @brief Class that enables getting cudaStream_t from cuda::Stream
         */
        struct StreamAccessor
        {
            CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
            CV_EXPORTS static Stream wrapStream(cudaStream_t stream);
        };
        /** @brief Class that enables getting cudaEvent_t from cuda::Event
         */
        struct EventAccessor
        {
            CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
            CV_EXPORTS static Event wrapEvent(cudaEvent_t event);
        };
 //! @}
    }
 }
 #endif /* OPENCV_CORE_CUDA_STREAM_ACCESSOR_HPP */
--- a/3rdparty/libopencv/include/opencv2/core/cuda_types.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cuda_types.hpp
@ -0,0 +1,135 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_CUDA_TYPES_HPP
 #define OPENCV_CORE_CUDA_TYPES_HPP
 #ifndef __cplusplus
 #  error cuda_types.hpp header must be compiled as C++
 #endif
 /** @file
 * @deprecated Use @ref cudev instead.
 */
 //! @cond IGNORED
 #ifdef __CUDACC__
    #define __CV_CUDA_HOST_DEVICE__ __host__ __device__ __forceinline__
 #else
    #define __CV_CUDA_HOST_DEVICE__
 #endif
 namespace cv
 {
    namespace cuda
    {
        // Simple lightweight structures that encapsulates information about an image on device.
        // It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
        template <typename T> struct DevPtr
        {
            typedef T elem_type;
            typedef int index_type;
            enum { elem_size = sizeof(elem_type) };
            T* data;
            __CV_CUDA_HOST_DEVICE__ DevPtr() : data(0) {}
            __CV_CUDA_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
            __CV_CUDA_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
            __CV_CUDA_HOST_DEVICE__ operator       T*()       { return data; }
            __CV_CUDA_HOST_DEVICE__ operator const T*() const { return data; }
        };
        template <typename T> struct PtrSz : public DevPtr<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrSz() : size(0) {}
            __CV_CUDA_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
            size_t size;
        };
        template <typename T> struct PtrStep : public DevPtr<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrStep() : step(0) {}
            __CV_CUDA_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
            size_t step;
            __CV_CUDA_HOST_DEVICE__       T* ptr(int y = 0)       { return (      T*)( (      char*)DevPtr<T>::data + y * step); }
            __CV_CUDA_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
            __CV_CUDA_HOST_DEVICE__       T& operator ()(int y, int x)       { return ptr(y)[x]; }
            __CV_CUDA_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
        };
        template <typename T> struct PtrStepSz : public PtrStep<T>
        {
            __CV_CUDA_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
            __CV_CUDA_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
                : PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
            template <typename U>
            explicit PtrStepSz(const PtrStepSz<U>& d) : PtrStep<T>((T*)d.data, d.step), cols(d.cols), rows(d.rows){}
            int cols;
            int rows;
        };
        typedef PtrStepSz<unsigned char> PtrStepSzb;
        typedef PtrStepSz<float> PtrStepSzf;
        typedef PtrStepSz<int> PtrStepSzi;
        typedef PtrStep<unsigned char> PtrStepb;
        typedef PtrStep<float> PtrStepf;
        typedef PtrStep<int> PtrStepi;
    }
 }
 //! @endcond
 #endif /* OPENCV_CORE_CUDA_TYPES_HPP */
--- a/3rdparty/libopencv/include/opencv2/core/cv_cpu_dispatch.h
+++ b/3rdparty/libopencv/include/opencv2/core/cv_cpu_dispatch.h
@ -0,0 +1,239 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 #if defined __OPENCV_BUILD \
 #include "cv_cpu_config.h"
 #include "cv_cpu_helper.h"
 #ifdef CV_CPU_DISPATCH_MODE
 #define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
 #else
 #define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
 #define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
 #define CV_CPU_OPTIMIZATION_NAMESPACE_END }
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...)  /* done */
 #define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
 #define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
 #if defined CV_ENABLE_INTRINSICS \
    && !defined CV_DISABLE_OPTIMIZATION \
    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */ \
 #ifdef CV_CPU_COMPILE_SSE2
 #  include <emmintrin.h>
 #  define CV_MMX 1
 #  define CV_SSE 1
 #  define CV_SSE2 1
 #endif
 #ifdef CV_CPU_COMPILE_SSE3
 #  include <pmmintrin.h>
 #  define CV_SSE3 1
 #endif
 #ifdef CV_CPU_COMPILE_SSSE3
 #  include <tmmintrin.h>
 #  define CV_SSSE3 1
 #endif
 #ifdef CV_CPU_COMPILE_SSE4_1
 #  include <smmintrin.h>
 #  define CV_SSE4_1 1
 #endif
 #ifdef CV_CPU_COMPILE_SSE4_2
 #  include <nmmintrin.h>
 #  define CV_SSE4_2 1
 #endif
 #ifdef CV_CPU_COMPILE_POPCNT
 #  ifdef _MSC_VER
 #    include <nmmintrin.h>
 #    if defined(_M_X64)
 #      define CV_POPCNT_U64 _mm_popcnt_u64
 #    endif
 #    define CV_POPCNT_U32 _mm_popcnt_u32
 #  else
 #    include <popcntintrin.h>
 #    if defined(__x86_64__)
 #      define CV_POPCNT_U64 __builtin_popcountll
 #    endif
 #    define CV_POPCNT_U32 __builtin_popcount
 #  endif
 #  define CV_POPCNT 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX
 #  include <immintrin.h>
 #  define CV_AVX 1
 #endif
 #ifdef CV_CPU_COMPILE_FP16
 #  if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
 #    include <arm_neon.h>
 #  else
 #    include <immintrin.h>
 #  endif
 #  define CV_FP16 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX2
 #  include <immintrin.h>
 #  define CV_AVX2 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX_512F
 #  include <immintrin.h>
 #  define CV_AVX_512F 1
 #endif
 #ifdef CV_CPU_COMPILE_AVX512_SKX
 #  include <immintrin.h>
 #  define CV_AVX512_SKX 1
 #endif
 #ifdef CV_CPU_COMPILE_FMA3
 #  define CV_FMA3 1
 #endif
 #if defined _WIN32 && defined(_M_ARM)
 # include <Intrin.h>
 # include <arm_neon.h>
 # define CV_NEON 1
 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
 #  include <arm_neon.h>
 #  define CV_NEON 1
 #endif
 #if defined(__ARM_NEON__) || defined(__aarch64__)
 #  include <arm_neon.h>
 #endif
 #if defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
 #  include <altivec.h>
 #  undef vector
 #  undef pixel
 #  undef bool
 #  define CV_VSX 1
 #endif
 #endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
 #if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
 struct VZeroUpperGuard {
 #ifdef __GNUC__
    __attribute__((always_inline))
 #endif
    inline ~VZeroUpperGuard() { _mm256_zeroupper(); }
 };
 #define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard; (void)__vzeroupper_guard;
 #endif
 #ifdef __CV_AVX_GUARD
 #define CV_AVX_GUARD __CV_AVX_GUARD
 #else
 #define CV_AVX_GUARD
 #endif
 #endif // __OPENCV_BUILD
 #if !defined __OPENCV_BUILD /* Compatibility code */ \
    && !defined __CUDACC__ /* do not include SSE/AVX/NEON headers for NVCC compiler */
 #if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
 #  include <emmintrin.h>
 #  define CV_MMX 1
 #  define CV_SSE 1
 #  define CV_SSE2 1
 #elif defined _WIN32 && defined(_M_ARM)
 # include <Intrin.h>
 # include <arm_neon.h>
 # define CV_NEON 1
 #elif defined(__ARM_NEON__) || (defined (__ARM_NEON) && defined(__aarch64__))
 #  include <arm_neon.h>
 #  define CV_NEON 1
 #elif defined(__VSX__) && defined(__PPC64__) && defined(__LITTLE_ENDIAN__)
 #  include <altivec.h>
 #  undef vector
 #  undef pixel
 #  undef bool
 #  define CV_VSX 1
 #endif
 #endif // !__OPENCV_BUILD && !__CUDACC (Compatibility code)
 #ifndef CV_MMX
 #  define CV_MMX 0
 #endif
 #ifndef CV_SSE
 #  define CV_SSE 0
 #endif
 #ifndef CV_SSE2
 #  define CV_SSE2 0
 #endif
 #ifndef CV_SSE3
 #  define CV_SSE3 0
 #endif
 #ifndef CV_SSSE3
 #  define CV_SSSE3 0
 #endif
 #ifndef CV_SSE4_1
 #  define CV_SSE4_1 0
 #endif
 #ifndef CV_SSE4_2
 #  define CV_SSE4_2 0
 #endif
 #ifndef CV_POPCNT
 #  define CV_POPCNT 0
 #endif
 #ifndef CV_AVX
 #  define CV_AVX 0
 #endif
 #ifndef CV_FP16
 #  define CV_FP16 0
 #endif
 #ifndef CV_AVX2
 #  define CV_AVX2 0
 #endif
 #ifndef CV_FMA3
 #  define CV_FMA3 0
 #endif
 #ifndef CV_AVX_512F
 #  define CV_AVX_512F 0
 #endif
 #ifndef CV_AVX_512BW
 #  define CV_AVX_512BW 0
 #endif
 #ifndef CV_AVX_512CD
 #  define CV_AVX_512CD 0
 #endif
 #ifndef CV_AVX_512DQ
 #  define CV_AVX_512DQ 0
 #endif
 #ifndef CV_AVX_512ER
 #  define CV_AVX_512ER 0
 #endif
 #ifndef CV_AVX_512IFMA512
 #  define CV_AVX_512IFMA512 0
 #endif
 #ifndef CV_AVX_512PF
 #  define CV_AVX_512PF 0
 #endif
 #ifndef CV_AVX_512VBMI
 #  define CV_AVX_512VBMI 0
 #endif
 #ifndef CV_AVX_512VL
 #  define CV_AVX_512VL 0
 #endif
 #ifndef CV_AVX512_SKX
 #  define CV_AVX512_SKX 0
 #endif
 #ifndef CV_NEON
 #  define CV_NEON 0
 #endif
 #ifndef CV_VSX
 #  define CV_VSX 0
 #endif
--- a/3rdparty/libopencv/include/opencv2/core/cv_cpu_helper.h
+++ b/3rdparty/libopencv/include/opencv2/core/cv_cpu_helper.h
@ -0,0 +1,274 @@
 // AUTOGENERATED, DO NOT EDIT
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
 #  define CV_TRY_SSE 1
 #  define CV_CPU_HAS_SUPPORT_SSE 1
 #  define CV_CPU_CALL_SSE(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE_(fn, args) return (opt_SSE::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
 #  define CV_TRY_SSE 1
 #  define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
 #  define CV_CPU_CALL_SSE(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
 #  define CV_CPU_CALL_SSE_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
 #else
 #  define CV_TRY_SSE 0
 #  define CV_CPU_HAS_SUPPORT_SSE 0
 #  define CV_CPU_CALL_SSE(fn, args)
 #  define CV_CPU_CALL_SSE_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...)  CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
 #  define CV_TRY_SSE2 1
 #  define CV_CPU_HAS_SUPPORT_SSE2 1
 #  define CV_CPU_CALL_SSE2(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE2_(fn, args) return (opt_SSE2::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
 #  define CV_TRY_SSE2 1
 #  define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
 #  define CV_CPU_CALL_SSE2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
 #  define CV_CPU_CALL_SSE2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
 #else
 #  define CV_TRY_SSE2 0
 #  define CV_CPU_HAS_SUPPORT_SSE2 0
 #  define CV_CPU_CALL_SSE2(fn, args)
 #  define CV_CPU_CALL_SSE2_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...)  CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
 #  define CV_TRY_SSE3 1
 #  define CV_CPU_HAS_SUPPORT_SSE3 1
 #  define CV_CPU_CALL_SSE3(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE3_(fn, args) return (opt_SSE3::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
 #  define CV_TRY_SSE3 1
 #  define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
 #  define CV_CPU_CALL_SSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
 #  define CV_CPU_CALL_SSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
 #else
 #  define CV_TRY_SSE3 0
 #  define CV_CPU_HAS_SUPPORT_SSE3 0
 #  define CV_CPU_CALL_SSE3(fn, args)
 #  define CV_CPU_CALL_SSE3_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...)  CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
 #  define CV_TRY_SSSE3 1
 #  define CV_CPU_HAS_SUPPORT_SSSE3 1
 #  define CV_CPU_CALL_SSSE3(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSSE3_(fn, args) return (opt_SSSE3::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
 #  define CV_TRY_SSSE3 1
 #  define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
 #  define CV_CPU_CALL_SSSE3(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
 #  define CV_CPU_CALL_SSSE3_(fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
 #else
 #  define CV_TRY_SSSE3 0
 #  define CV_CPU_HAS_SUPPORT_SSSE3 0
 #  define CV_CPU_CALL_SSSE3(fn, args)
 #  define CV_CPU_CALL_SSSE3_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...)  CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
 #  define CV_TRY_SSE4_1 1
 #  define CV_CPU_HAS_SUPPORT_SSE4_1 1
 #  define CV_CPU_CALL_SSE4_1(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE4_1_(fn, args) return (opt_SSE4_1::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
 #  define CV_TRY_SSE4_1 1
 #  define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
 #  define CV_CPU_CALL_SSE4_1(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
 #  define CV_CPU_CALL_SSE4_1_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
 #else
 #  define CV_TRY_SSE4_1 0
 #  define CV_CPU_HAS_SUPPORT_SSE4_1 0
 #  define CV_CPU_CALL_SSE4_1(fn, args)
 #  define CV_CPU_CALL_SSE4_1_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...)  CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
 #  define CV_TRY_SSE4_2 1
 #  define CV_CPU_HAS_SUPPORT_SSE4_2 1
 #  define CV_CPU_CALL_SSE4_2(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_SSE4_2_(fn, args) return (opt_SSE4_2::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
 #  define CV_TRY_SSE4_2 1
 #  define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
 #  define CV_CPU_CALL_SSE4_2(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
 #  define CV_CPU_CALL_SSE4_2_(fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
 #else
 #  define CV_TRY_SSE4_2 0
 #  define CV_CPU_HAS_SUPPORT_SSE4_2 0
 #  define CV_CPU_CALL_SSE4_2(fn, args)
 #  define CV_CPU_CALL_SSE4_2_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...)  CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
 #  define CV_TRY_POPCNT 1
 #  define CV_CPU_HAS_SUPPORT_POPCNT 1
 #  define CV_CPU_CALL_POPCNT(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_POPCNT_(fn, args) return (opt_POPCNT::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
 #  define CV_TRY_POPCNT 1
 #  define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
 #  define CV_CPU_CALL_POPCNT(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
 #  define CV_CPU_CALL_POPCNT_(fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
 #else
 #  define CV_TRY_POPCNT 0
 #  define CV_CPU_HAS_SUPPORT_POPCNT 0
 #  define CV_CPU_CALL_POPCNT(fn, args)
 #  define CV_CPU_CALL_POPCNT_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...)  CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
 #  define CV_TRY_AVX 1
 #  define CV_CPU_HAS_SUPPORT_AVX 1
 #  define CV_CPU_CALL_AVX(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX_(fn, args) return (opt_AVX::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
 #  define CV_TRY_AVX 1
 #  define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
 #  define CV_CPU_CALL_AVX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
 #  define CV_CPU_CALL_AVX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
 #else
 #  define CV_TRY_AVX 0
 #  define CV_CPU_HAS_SUPPORT_AVX 0
 #  define CV_CPU_CALL_AVX(fn, args)
 #  define CV_CPU_CALL_AVX_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...)  CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
 #  define CV_TRY_FP16 1
 #  define CV_CPU_HAS_SUPPORT_FP16 1
 #  define CV_CPU_CALL_FP16(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_FP16_(fn, args) return (opt_FP16::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
 #  define CV_TRY_FP16 1
 #  define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
 #  define CV_CPU_CALL_FP16(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
 #  define CV_CPU_CALL_FP16_(fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
 #else
 #  define CV_TRY_FP16 0
 #  define CV_CPU_HAS_SUPPORT_FP16 0
 #  define CV_CPU_CALL_FP16(fn, args)
 #  define CV_CPU_CALL_FP16_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...)  CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
 #  define CV_TRY_AVX2 1
 #  define CV_CPU_HAS_SUPPORT_AVX2 1
 #  define CV_CPU_CALL_AVX2(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX2_(fn, args) return (opt_AVX2::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
 #  define CV_TRY_AVX2 1
 #  define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
 #  define CV_CPU_CALL_AVX2(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
 #  define CV_CPU_CALL_AVX2_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
 #else
 #  define CV_TRY_AVX2 0
 #  define CV_CPU_HAS_SUPPORT_AVX2 0
 #  define CV_CPU_CALL_AVX2(fn, args)
 #  define CV_CPU_CALL_AVX2_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...)  CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
 #  define CV_TRY_FMA3 1
 #  define CV_CPU_HAS_SUPPORT_FMA3 1
 #  define CV_CPU_CALL_FMA3(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_FMA3_(fn, args) return (opt_FMA3::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
 #  define CV_TRY_FMA3 1
 #  define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
 #  define CV_CPU_CALL_FMA3(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
 #  define CV_CPU_CALL_FMA3_(fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
 #else
 #  define CV_TRY_FMA3 0
 #  define CV_CPU_HAS_SUPPORT_FMA3 0
 #  define CV_CPU_CALL_FMA3(fn, args)
 #  define CV_CPU_CALL_FMA3_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...)  CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX_512F
 #  define CV_TRY_AVX_512F 1
 #  define CV_CPU_HAS_SUPPORT_AVX_512F 1
 #  define CV_CPU_CALL_AVX_512F(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX_512F_(fn, args) return (opt_AVX_512F::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX_512F
 #  define CV_TRY_AVX_512F 1
 #  define CV_CPU_HAS_SUPPORT_AVX_512F (cv::checkHardwareSupport(CV_CPU_AVX_512F))
 #  define CV_CPU_CALL_AVX_512F(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
 #  define CV_CPU_CALL_AVX_512F_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX_512F) return (opt_AVX_512F::fn args)
 #else
 #  define CV_TRY_AVX_512F 0
 #  define CV_CPU_HAS_SUPPORT_AVX_512F 0
 #  define CV_CPU_CALL_AVX_512F(fn, args)
 #  define CV_CPU_CALL_AVX_512F_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX_512F(fn, args, mode, ...)  CV_CPU_CALL_AVX_512F(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX512_SKX
 #  define CV_TRY_AVX512_SKX 1
 #  define CV_CPU_HAS_SUPPORT_AVX512_SKX 1
 #  define CV_CPU_CALL_AVX512_SKX(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_AVX512_SKX_(fn, args) return (opt_AVX512_SKX::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX512_SKX
 #  define CV_TRY_AVX512_SKX 1
 #  define CV_CPU_HAS_SUPPORT_AVX512_SKX (cv::checkHardwareSupport(CV_CPU_AVX512_SKX))
 #  define CV_CPU_CALL_AVX512_SKX(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
 #  define CV_CPU_CALL_AVX512_SKX_(fn, args) if (CV_CPU_HAS_SUPPORT_AVX512_SKX) return (opt_AVX512_SKX::fn args)
 #else
 #  define CV_TRY_AVX512_SKX 0
 #  define CV_CPU_HAS_SUPPORT_AVX512_SKX 0
 #  define CV_CPU_CALL_AVX512_SKX(fn, args)
 #  define CV_CPU_CALL_AVX512_SKX_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_AVX512_SKX(fn, args, mode, ...)  CV_CPU_CALL_AVX512_SKX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
 #  define CV_TRY_NEON 1
 #  define CV_CPU_HAS_SUPPORT_NEON 1
 #  define CV_CPU_CALL_NEON(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_NEON_(fn, args) return (opt_NEON::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
 #  define CV_TRY_NEON 1
 #  define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
 #  define CV_CPU_CALL_NEON(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
 #  define CV_CPU_CALL_NEON_(fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
 #else
 #  define CV_TRY_NEON 0
 #  define CV_CPU_HAS_SUPPORT_NEON 0
 #  define CV_CPU_CALL_NEON(fn, args)
 #  define CV_CPU_CALL_NEON_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...)  CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_VSX
 #  define CV_TRY_VSX 1
 #  define CV_CPU_HAS_SUPPORT_VSX 1
 #  define CV_CPU_CALL_VSX(fn, args) return (cpu_baseline::fn args)
 #  define CV_CPU_CALL_VSX_(fn, args) return (opt_VSX::fn args)
 #elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_VSX
 #  define CV_TRY_VSX 1
 #  define CV_CPU_HAS_SUPPORT_VSX (cv::checkHardwareSupport(CV_CPU_VSX))
 #  define CV_CPU_CALL_VSX(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
 #  define CV_CPU_CALL_VSX_(fn, args) if (CV_CPU_HAS_SUPPORT_VSX) return (opt_VSX::fn args)
 #else
 #  define CV_TRY_VSX 0
 #  define CV_CPU_HAS_SUPPORT_VSX 0
 #  define CV_CPU_CALL_VSX(fn, args)
 #  define CV_CPU_CALL_VSX_(fn, args)
 #endif
 #define __CV_CPU_DISPATCH_CHAIN_VSX(fn, args, mode, ...)  CV_CPU_CALL_VSX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
 #define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
 #define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...)  CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
--- a/3rdparty/libopencv/include/opencv2/core/cvdef.h
+++ b/3rdparty/libopencv/include/opencv2/core/cvdef.h
@ -0,0 +1,501 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_CVDEF_H
 #define OPENCV_CORE_CVDEF_H
 //! @addtogroup core_utils
 //! @{
 #if !defined CV_DOXYGEN && !defined CV_IGNORE_DEBUG_BUILD_GUARD
 #if (defined(_MSC_VER) && (defined(DEBUG) || defined(_DEBUG))) || \
    (defined(_GLIBCXX_DEBUG) || defined(_GLIBCXX_DEBUG_PEDANTIC))
 // Guard to prevent using of binary incompatible binaries / runtimes
 // https://github.com/opencv/opencv/pull/9161
 #define CV__DEBUG_NS_BEGIN namespace debug_build_guard {
 #define CV__DEBUG_NS_END }
 namespace cv { namespace debug_build_guard { } using namespace debug_build_guard; }
 #endif
 #endif
 #ifndef CV__DEBUG_NS_BEGIN
 #define CV__DEBUG_NS_BEGIN
 #define CV__DEBUG_NS_END
 #endif
 #ifdef __OPENCV_BUILD
 #include "cvconfig.h"
 #endif
 #ifndef __CV_EXPAND
 #define __CV_EXPAND(x) x
 #endif
 #ifndef __CV_CAT
 #define __CV_CAT__(x, y) x ## y
 #define __CV_CAT_(x, y) __CV_CAT__(x, y)
 #define __CV_CAT(x, y) __CV_CAT_(x, y)
 #endif
 // undef problematic defines sometimes defined by system headers (windows.h in particular)
 #undef small
 #undef min
 #undef max
 #undef abs
 #undef Complex
 #include <limits.h>
 #include "opencv2/core/hal/interface.h"
 #if defined __ICL
 #  define CV_ICC   __ICL
 #elif defined __ICC
 #  define CV_ICC   __ICC
 #elif defined __ECL
 #  define CV_ICC   __ECL
 #elif defined __ECC
 #  define CV_ICC   __ECC
 #elif defined __INTEL_COMPILER
 #  define CV_ICC   __INTEL_COMPILER
 #endif
 #ifndef CV_INLINE
 #  if defined __cplusplus
 #    define CV_INLINE static inline
 #  elif defined _MSC_VER
 #    define CV_INLINE __inline
 #  else
 #    define CV_INLINE static
 #  endif
 #endif
 #if defined CV_DISABLE_OPTIMIZATION || (defined CV_ICC && !defined CV_ENABLE_UNROLLED)
 #  define CV_ENABLE_UNROLLED 0
 #else
 #  define CV_ENABLE_UNROLLED 1
 #endif
 #ifdef __GNUC__
 #  define CV_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
 #elif defined _MSC_VER
 #  define CV_DECL_ALIGNED(x) __declspec(align(x))
 #else
 #  define CV_DECL_ALIGNED(x)
 #endif
 /* CPU features and intrinsics support */
 #define CV_CPU_NONE             0
 #define CV_CPU_MMX              1
 #define CV_CPU_SSE              2
 #define CV_CPU_SSE2             3
 #define CV_CPU_SSE3             4
 #define CV_CPU_SSSE3            5
 #define CV_CPU_SSE4_1           6
 #define CV_CPU_SSE4_2           7
 #define CV_CPU_POPCNT           8
 #define CV_CPU_FP16             9
 #define CV_CPU_AVX              10
 #define CV_CPU_AVX2             11
 #define CV_CPU_FMA3             12
 #define CV_CPU_AVX_512F         13
 #define CV_CPU_AVX_512BW        14
 #define CV_CPU_AVX_512CD        15
 #define CV_CPU_AVX_512DQ        16
 #define CV_CPU_AVX_512ER        17
 #define CV_CPU_AVX_512IFMA512   18 // deprecated
 #define CV_CPU_AVX_512IFMA      18
 #define CV_CPU_AVX_512PF        19
 #define CV_CPU_AVX_512VBMI      20
 #define CV_CPU_AVX_512VL        21
 #define CV_CPU_NEON   100
 #define CV_CPU_VSX 200
 // CPU features groups
 #define CV_CPU_AVX512_SKX       256
 // when adding to this list remember to update the following enum
 #define CV_HARDWARE_MAX_FEATURE 512
 /** @brief Available CPU features.
 */
 enum CpuFeatures {
    CPU_MMX             = 1,
    CPU_SSE             = 2,
    CPU_SSE2            = 3,
    CPU_SSE3            = 4,
    CPU_SSSE3           = 5,
    CPU_SSE4_1          = 6,
    CPU_SSE4_2          = 7,
    CPU_POPCNT          = 8,
    CPU_FP16            = 9,
    CPU_AVX             = 10,
    CPU_AVX2            = 11,
    CPU_FMA3            = 12,
    CPU_AVX_512F        = 13,
    CPU_AVX_512BW       = 14,
    CPU_AVX_512CD       = 15,
    CPU_AVX_512DQ       = 16,
    CPU_AVX_512ER       = 17,
    CPU_AVX_512IFMA512  = 18, // deprecated
    CPU_AVX_512IFMA     = 18,
    CPU_AVX_512PF       = 19,
    CPU_AVX_512VBMI     = 20,
    CPU_AVX_512VL       = 21,
    CPU_NEON            = 100,
    CPU_VSX             = 200,
    CPU_AVX512_SKX      = 256, //!< Skylake-X with AVX-512F/CD/BW/DQ/VL
    CPU_MAX_FEATURE     = 512  // see CV_HARDWARE_MAX_FEATURE
 };
 #include "cv_cpu_dispatch.h"
 /* fundamental constants */
 #define CV_PI   3.1415926535897932384626433832795
 #define CV_2PI  6.283185307179586476925286766559
 #define CV_LOG2 0.69314718055994530941723212145818
 #if defined __ARM_FP16_FORMAT_IEEE \
    && !defined __CUDACC__
 #  define CV_FP16_TYPE 1
 #else
 #  define CV_FP16_TYPE 0
 #endif
 typedef union Cv16suf
 {
    short i;
 #if CV_FP16_TYPE
    __fp16 h;
 #endif
    struct _fp16Format
    {
        unsigned int significand : 10;
        unsigned int exponent    : 5;
        unsigned int sign        : 1;
    } fmt;
 }
 Cv16suf;
 typedef union Cv32suf
 {
    int i;
    unsigned u;
    float f;
    struct _fp32Format
    {
        unsigned int significand : 23;
        unsigned int exponent    : 8;
        unsigned int sign        : 1;
    } fmt;
 }
 Cv32suf;
 typedef union Cv64suf
 {
    int64 i;
    uint64 u;
    double f;
 }
 Cv64suf;
 #define OPENCV_ABI_COMPATIBILITY 300
 #ifdef __OPENCV_BUILD
 #  define DISABLE_OPENCV_24_COMPATIBILITY
 #endif
 #ifdef CVAPI_EXPORTS
 # if (defined _WIN32 || defined WINCE || defined __CYGWIN__)
 #   define CV_EXPORTS __declspec(dllexport)
 # elif defined __GNUC__ && __GNUC__ >= 4
 #   define CV_EXPORTS __attribute__ ((visibility ("default")))
 # endif
 #endif
 #ifndef CV_EXPORTS
 # define CV_EXPORTS
 #endif
 #ifdef _MSC_VER
 #   define CV_EXPORTS_TEMPLATE
 #else
 #   define CV_EXPORTS_TEMPLATE CV_EXPORTS
 #endif
 #ifndef CV_DEPRECATED
 #  if defined(__GNUC__)
 #    define CV_DEPRECATED __attribute__ ((deprecated))
 #  elif defined(_MSC_VER)
 #    define CV_DEPRECATED __declspec(deprecated)
 #  else
 #    define CV_DEPRECATED
 #  endif
 #endif
 #ifndef CV_EXTERN_C
 #  ifdef __cplusplus
 #    define CV_EXTERN_C extern "C"
 #  else
 #    define CV_EXTERN_C
 #  endif
 #endif
 /* special informative macros for wrapper generators */
 #define CV_EXPORTS_W CV_EXPORTS
 #define CV_EXPORTS_W_SIMPLE CV_EXPORTS
 #define CV_EXPORTS_AS(synonym) CV_EXPORTS
 #define CV_EXPORTS_W_MAP CV_EXPORTS
 #define CV_IN_OUT
 #define CV_OUT
 #define CV_PROP
 #define CV_PROP_RW
 #define CV_WRAP
 #define CV_WRAP_AS(synonym)
 /****************************************************************************************\
 *                                  Matrix type (Mat)                                     *
 \****************************************************************************************/
 #define CV_MAT_CN_MASK          ((CV_CN_MAX - 1) << CV_CN_SHIFT)
 #define CV_MAT_CN(flags)        ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1)
 #define CV_MAT_TYPE_MASK        (CV_DEPTH_MAX*CV_CN_MAX - 1)
 #define CV_MAT_TYPE(flags)      ((flags) & CV_MAT_TYPE_MASK)
 #define CV_MAT_CONT_FLAG_SHIFT  14
 #define CV_MAT_CONT_FLAG        (1 << CV_MAT_CONT_FLAG_SHIFT)
 #define CV_IS_MAT_CONT(flags)   ((flags) & CV_MAT_CONT_FLAG)
 #define CV_IS_CONT_MAT          CV_IS_MAT_CONT
 #define CV_SUBMAT_FLAG_SHIFT    15
 #define CV_SUBMAT_FLAG          (1 << CV_SUBMAT_FLAG_SHIFT)
 #define CV_IS_SUBMAT(flags)     ((flags) & CV_MAT_SUBMAT_FLAG)
 /** Size of each channel item,
   0x8442211 = 1000 0100 0100 0010 0010 0001 0001 ~ array of sizeof(arr_type_elem) */
 #define CV_ELEM_SIZE1(type) \
    ((((sizeof(size_t)<<28)|0x8442211) >> CV_MAT_DEPTH(type)*4) & 15)
 /** 0x3a50 = 11 10 10 01 01 00 00 ~ array of log2(sizeof(arr_type_elem)) */
 #define CV_ELEM_SIZE(type) \
    (CV_MAT_CN(type) << ((((sizeof(size_t)/4+1)*16384|0x3a50) >> CV_MAT_DEPTH(type)*2) & 3))
 #ifndef MIN
 #  define MIN(a,b)  ((a) > (b) ? (b) : (a))
 #endif
 #ifndef MAX
 #  define MAX(a,b)  ((a) < (b) ? (b) : (a))
 #endif
 /****************************************************************************************\
 *                                    static analysys                                     *
 \****************************************************************************************/
 // In practice, some macro are not processed correctly (noreturn is not detected).
 // We need to use simplified definition for them.
 #ifndef CV_STATIC_ANALYSIS
 # if defined(__KLOCWORK__) || defined(__clang_analyzer__) || defined(__COVERITY__)
 #   define CV_STATIC_ANALYSIS
 # endif
 #endif
 /****************************************************************************************\
 *                                    Thread sanitizer                                    *
 \****************************************************************************************/
 #ifndef CV_THREAD_SANITIZER
 # if defined(__has_feature)
 #   if __has_feature(thread_sanitizer)
 #     define CV_THREAD_SANITIZER
 #   endif
 # endif
 #endif
 /****************************************************************************************\
 *          exchange-add operation for atomic operations on reference counters            *
 \****************************************************************************************/
 #ifdef CV_XADD
  // allow to use user-defined macro
 #elif defined __GNUC__ || defined __clang__
 #  if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
 #    ifdef __ATOMIC_ACQ_REL
 #      define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
 #    else
 #      define CV_XADD(addr, delta) __atomic_fetch_add((_Atomic(int)*)(addr), delta, 4)
 #    endif
 #  else
 #    if defined __ATOMIC_ACQ_REL && !defined __clang__
       // version for gcc >= 4.7
 #      define CV_XADD(addr, delta) (int)__atomic_fetch_add((unsigned*)(addr), (unsigned)(delta), __ATOMIC_ACQ_REL)
 #    else
 #      define CV_XADD(addr, delta) (int)__sync_fetch_and_add((unsigned*)(addr), (unsigned)(delta))
 #    endif
 #  endif
 #elif defined _MSC_VER && !defined RC_INVOKED
 #  include <intrin.h>
 #  define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd((long volatile*)addr, delta)
 #else
   CV_INLINE CV_XADD(int* addr, int delta) { int tmp = *addr; *addr += delta; return tmp; }
 #endif
 /****************************************************************************************\
 *                                  CV_NORETURN attribute                                 *
 \****************************************************************************************/
 #ifndef CV_NORETURN
 #  if defined(__GNUC__)
 #    define CV_NORETURN __attribute__((__noreturn__))
 #  elif defined(_MSC_VER) && (_MSC_VER >= 1300)
 #    define CV_NORETURN __declspec(noreturn)
 #  else
 #    define CV_NORETURN /* nothing by default */
 #  endif
 #endif
 /****************************************************************************************\
 *                                    C++ 11                                              *
 \****************************************************************************************/
 #ifndef CV_CXX11
 #  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1800)
 #    define CV_CXX11 1
 #  endif
 #else
 #  if CV_CXX11 == 0
 #    undef CV_CXX11
 #  endif
 #endif
 /****************************************************************************************\
 *                                    C++ Move semantics                                  *
 \****************************************************************************************/
 #ifndef CV_CXX_MOVE_SEMANTICS
 #  if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(_MSC_VER) && _MSC_VER >= 1600)
 #    define CV_CXX_MOVE_SEMANTICS 1
 #  elif defined(__clang)
 #    if __has_feature(cxx_rvalue_references)
 #      define CV_CXX_MOVE_SEMANTICS 1
 #    endif
 #  endif
 #else
 #  if CV_CXX_MOVE_SEMANTICS == 0
 #    undef CV_CXX_MOVE_SEMANTICS
 #  endif
 #endif
 /****************************************************************************************\
 *                                    C++11 std::array                                    *
 \****************************************************************************************/
 #ifndef CV_CXX_STD_ARRAY
 #  if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900/*MSVS 2015*/)
 #    define CV_CXX_STD_ARRAY 1
 #    include <array>
 #  endif
 #else
 #  if CV_CXX_STD_ARRAY == 0
 #    undef CV_CXX_STD_ARRAY
 #  endif
 #endif
 // Integer types portatibility
 #ifdef OPENCV_STDINT_HEADER
 #include OPENCV_STDINT_HEADER
 #else
 #if defined(_MSC_VER) && _MSC_VER < 1600 /* MSVS 2010 */
 namespace cv {
 typedef signed char int8_t;
 typedef unsigned char uint8_t;
 typedef signed short int16_t;
 typedef unsigned short uint16_t;
 typedef signed int int32_t;
 typedef unsigned int uint32_t;
 typedef signed __int64 int64_t;
 typedef unsigned __int64 uint64_t;
 }
 #elif defined(_MSC_VER) || __cplusplus >= 201103L
 #include <cstdint>
 namespace cv {
 using std::int8_t;
 using std::uint8_t;
 using std::int16_t;
 using std::uint16_t;
 using std::int32_t;
 using std::uint32_t;
 using std::int64_t;
 using std::uint64_t;
 }
 #else
 #include <stdint.h>
 namespace cv {
 typedef ::int8_t int8_t;
 typedef ::uint8_t uint8_t;
 typedef ::int16_t int16_t;
 typedef ::uint16_t uint16_t;
 typedef ::int32_t int32_t;
 typedef ::uint32_t uint32_t;
 typedef ::int64_t int64_t;
 typedef ::uint64_t uint64_t;
 }
 #endif
 #endif
 //! @}
 #endif // OPENCV_CORE_CVDEF_H
--- a/3rdparty/libopencv/include/opencv2/core/cvstd.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cvstd.hpp
--- a/3rdparty/libopencv/include/opencv2/core/cvstd.inl.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/cvstd.inl.hpp
@ -0,0 +1,286 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_CVSTDINL_HPP
 #define OPENCV_CORE_CVSTDINL_HPP
 #include <complex>
 #include <ostream>
 //! @cond IGNORED
 #ifdef _MSC_VER
 #pragma warning( push )
 #pragma warning( disable: 4127 )
 #endif
 namespace cv
 {
 template<typename _Tp> class DataType< std::complex<_Tp> >
 {
 public:
    typedef std::complex<_Tp>  value_type;
    typedef value_type         work_type;
    typedef _Tp                channel_type;
    enum { generic_type = 0,
           depth        = DataType<channel_type>::depth,
           channels     = 2,
           fmt          = DataType<channel_type>::fmt + ((channels - 1) << 8),
           type         = CV_MAKETYPE(depth, channels) };
    typedef Vec<channel_type, channels> vec_type;
 };
 inline
 String::String(const std::string& str)
    : cstr_(0), len_(0)
 {
    if (!str.empty())
    {
        size_t len = str.size();
        if (len) memcpy(allocate(len), str.c_str(), len);
    }
 }
 inline
 String::String(const std::string& str, size_t pos, size_t len)
    : cstr_(0), len_(0)
 {
    size_t strlen = str.size();
    pos = min(pos, strlen);
    len = min(strlen - pos, len);
    if (!len) return;
    memcpy(allocate(len), str.c_str() + pos, len);
 }
 inline
 String& String::operator = (const std::string& str)
 {
    deallocate();
    if (!str.empty())
    {
        size_t len = str.size();
        if (len) memcpy(allocate(len), str.c_str(), len);
    }
    return *this;
 }
 inline
 String& String::operator += (const std::string& str)
 {
    *this = *this + str;
    return *this;
 }
 inline
 String::operator std::string() const
 {
    return std::string(cstr_, len_);
 }
 inline
 String operator + (const String& lhs, const std::string& rhs)
 {
    String s;
    size_t rhslen = rhs.size();
    s.allocate(lhs.len_ + rhslen);
    if (lhs.len_) memcpy(s.cstr_, lhs.cstr_, lhs.len_);
    if (rhslen) memcpy(s.cstr_ + lhs.len_, rhs.c_str(), rhslen);
    return s;
 }
 inline
 String operator + (const std::string& lhs, const String& rhs)
 {
    String s;
    size_t lhslen = lhs.size();
    s.allocate(lhslen + rhs.len_);
    if (lhslen) memcpy(s.cstr_, lhs.c_str(), lhslen);
    if (rhs.len_) memcpy(s.cstr_ + lhslen, rhs.cstr_, rhs.len_);
    return s;
 }
 inline
 FileNode::operator std::string() const
 {
    String value;
    read(*this, value, value);
    return value;
 }
 template<> inline
 void operator >> (const FileNode& n, std::string& value)
 {
    read(n, value, std::string());
 }
 template<> inline
 FileStorage& operator << (FileStorage& fs, const std::string& value)
 {
    return fs << cv::String(value);
 }
 static inline
 std::ostream& operator << (std::ostream& os, const String& str)
 {
    return os << str.c_str();
 }
 static inline
 std::ostream& operator << (std::ostream& out, Ptr<Formatted> fmtd)
 {
    fmtd->reset();
    for(const char* str = fmtd->next(); str; str = fmtd->next())
        out << str;
    return out;
 }
 static inline
 std::ostream& operator << (std::ostream& out, const Mat& mtx)
 {
    return out << Formatter::get()->format(mtx);
 }
 static inline
 std::ostream& operator << (std::ostream& out, const UMat& m)
 {
    return out << m.getMat(ACCESS_READ);
 }
 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Complex<_Tp>& c)
 {
    return out << "(" << c.re << "," << c.im << ")";
 }
 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const std::vector<Point_<_Tp> >& vec)
 {
    return out << Formatter::get()->format(Mat(vec));
 }
 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const std::vector<Point3_<_Tp> >& vec)
 {
    return out << Formatter::get()->format(Mat(vec));
 }
 template<typename _Tp, int m, int n> static inline
 std::ostream& operator << (std::ostream& out, const Matx<_Tp, m, n>& matx)
 {
    return out << Formatter::get()->format(Mat(matx));
 }
 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Point_<_Tp>& p)
 {
    out << "[" << p.x << ", " << p.y << "]";
    return out;
 }
 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Point3_<_Tp>& p)
 {
    out << "[" << p.x << ", " << p.y << ", " << p.z << "]";
    return out;
 }
 template<typename _Tp, int n> static inline
 std::ostream& operator << (std::ostream& out, const Vec<_Tp, n>& vec)
 {
    out << "[";
    if (cv::traits::Depth<_Tp>::value <= CV_32S)
    {
        for (int i = 0; i < n - 1; ++i) {
            out << (int)vec[i] << ", ";
        }
        out << (int)vec[n-1] << "]";
    }
    else
    {
        for (int i = 0; i < n - 1; ++i) {
            out << vec[i] << ", ";
        }
        out << vec[n-1] << "]";
    }
    return out;
 }
 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Size_<_Tp>& size)
 {
    return out << "[" << size.width << " x " << size.height << "]";
 }
 template<typename _Tp> static inline
 std::ostream& operator << (std::ostream& out, const Rect_<_Tp>& rect)
 {
    return out << "[" << rect.width << " x " << rect.height << " from (" << rect.x << ", " << rect.y << ")]";
 }
 static inline std::ostream& operator << (std::ostream& out, const MatSize& msize)
 {
    int i, dims = msize.p[-1];
    for( i = 0; i < dims; i++ )
    {
        out << msize.p[i];
        if( i < dims-1 )
            out << " x ";
    }
    return out;
 }
 } // cv
 #ifdef _MSC_VER
 #pragma warning( pop )
 #endif
 //! @endcond
 #endif // OPENCV_CORE_CVSTDINL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/directx.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/directx.hpp
@ -0,0 +1,184 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors as is and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the copyright holders or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_DIRECTX_HPP
 #define OPENCV_CORE_DIRECTX_HPP
 #include "mat.hpp"
 #include "ocl.hpp"
 #if !defined(__d3d11_h__)
 struct ID3D11Device;
 struct ID3D11Texture2D;
 #endif
 #if !defined(__d3d10_h__)
 struct ID3D10Device;
 struct ID3D10Texture2D;
 #endif
 #if !defined(_D3D9_H_)
 struct IDirect3DDevice9;
 struct IDirect3DDevice9Ex;
 struct IDirect3DSurface9;
 #endif
 namespace cv { namespace directx {
 namespace ocl {
 using namespace cv::ocl;
 //! @addtogroup core_directx
 // This section describes OpenCL and DirectX interoperability.
 //
 // To enable DirectX support, configure OpenCV using CMake with WITH_DIRECTX=ON . Note, DirectX is
 // supported only on Windows.
 //
 // To use OpenCL functionality you should first initialize OpenCL context from DirectX resource.
 //
 //! @{
 // TODO static functions in the Context class
 //! @brief Creates OpenCL context from D3D11 device
 //
 //! @param pD3D11Device - pointer to D3D11 device
 //! @return Returns reference to OpenCL Context
 CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
 //! @brief Creates OpenCL context from D3D10 device
 //
 //! @param pD3D10Device - pointer to D3D10 device
 //! @return Returns reference to OpenCL Context
 CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
 //! @brief Creates OpenCL context from Direct3DDevice9Ex device
 //
 //! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device
 //! @return Returns reference to OpenCL Context
 CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
 //! @brief Creates OpenCL context from Direct3DDevice9 device
 //
 //! @param pDirect3DDevice9 - pointer to Direct3Device9 device
 //! @return Returns reference to OpenCL Context
 CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
 //! @}
 } // namespace cv::directx::ocl
 //! @addtogroup core_directx
 //! @{
 //! @brief Converts InputArray to ID3D11Texture2D. If destination texture format is DXGI_FORMAT_NV12 then
 //!        input UMat expected to be in BGR format and data will be downsampled and color-converted to NV12.
 //
 //! @note Note: Destination texture must be allocated by application. Function does memory copy from src to
 //!             pD3D11Texture2D
 //
 //! @param src - source InputArray
 //! @param pD3D11Texture2D - destination D3D11 texture
 CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
 //! @brief Converts ID3D11Texture2D to OutputArray. If input texture format is DXGI_FORMAT_NV12 then
 //!        data will be upsampled and color-converted to BGR format.
 //
 //! @note Note: Destination matrix will be re-allocated if it has not enough memory to match texture size.
 //!             function does memory copy from pD3D11Texture2D to dst
 //
 //! @param pD3D11Texture2D - source D3D11 texture
 //! @param dst             - destination OutputArray
 CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
 //! @brief Converts InputArray to ID3D10Texture2D
 //
 //! @note Note: function does memory copy from src to
 //!             pD3D10Texture2D
 //
 //! @param src             - source InputArray
 //! @param pD3D10Texture2D - destination D3D10 texture
 CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
 //! @brief Converts ID3D10Texture2D to OutputArray
 //
 //! @note Note: function does memory copy from pD3D10Texture2D
 //!             to dst
 //
 //! @param pD3D10Texture2D - source D3D10 texture
 //! @param dst             - destination OutputArray
 CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
 //! @brief Converts InputArray to IDirect3DSurface9
 //
 //! @note Note: function does memory copy from src to
 //!             pDirect3DSurface9
 //
 //! @param src                 - source InputArray
 //! @param pDirect3DSurface9   - destination D3D10 texture
 //! @param surfaceSharedHandle - shared handle
 CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
 //! @brief Converts IDirect3DSurface9 to OutputArray
 //
 //! @note Note: function does memory copy from pDirect3DSurface9
 //!             to dst
 //
 //! @param pDirect3DSurface9   - source D3D10 texture
 //! @param dst                 - destination OutputArray
 //! @param surfaceSharedHandle - shared handle
 CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
 //! @brief Get OpenCV type from DirectX type
 //! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11
 //! @return OpenCV type or -1 if there is no equivalent
 CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
 //! @brief Get OpenCV type from DirectX type
 //! @param iD3DFORMAT - enum D3DTYPE for D3D9
 //! @return OpenCV type or -1 if there is no equivalent
 CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
 //! @}
 } } // namespace cv::directx
 #endif // OPENCV_CORE_DIRECTX_HPP
--- a/3rdparty/libopencv/include/opencv2/core/eigen.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/eigen.hpp
@ -0,0 +1,280 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_EIGEN_HPP
 #define OPENCV_CORE_EIGEN_HPP
 #include "opencv2/core.hpp"
 #if defined _MSC_VER && _MSC_VER >= 1200
 #pragma warning( disable: 4714 ) //__forceinline is not inlined
 #pragma warning( disable: 4127 ) //conditional expression is constant
 #pragma warning( disable: 4244 ) //conversion from '__int64' to 'int', possible loss of data
 #endif
 namespace cv
 {
 //! @addtogroup core_eigen
 //! @{
 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
 void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src, Mat& dst )
 {
    if( !(src.Flags & Eigen::RowMajorBit) )
    {
        Mat _src(src.cols(), src.rows(), traits::Type<_Tp>::value,
              (void*)src.data(), src.outerStride()*sizeof(_Tp));
        transpose(_src, dst);
    }
    else
    {
        Mat _src(src.rows(), src.cols(), traits::Type<_Tp>::value,
                 (void*)src.data(), src.outerStride()*sizeof(_Tp));
        _src.copyTo(dst);
    }
 }
 // Matx case
 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
 void eigen2cv( const Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& src,
               Matx<_Tp, _rows, _cols>& dst )
 {
    if( !(src.Flags & Eigen::RowMajorBit) )
    {
        dst = Matx<_Tp, _cols, _rows>(static_cast<const _Tp*>(src.data())).t();
    }
    else
    {
        dst = Matx<_Tp, _rows, _cols>(static_cast<const _Tp*>(src.data()));
    }
 }
 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
 void cv2eigen( const Mat& src,
               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
 {
    CV_DbgAssert(src.rows == _rows && src.cols == _cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        if( src.type() == _dst.type() )
            transpose(src, _dst);
        else if( src.cols == src.rows )
        {
            src.convertTo(_dst, _dst.type());
            transpose(_dst, _dst);
        }
        else
            Mat(src.t()).convertTo(_dst, _dst.type());
    }
    else
    {
        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.convertTo(_dst, _dst.type());
    }
 }
 // Matx case
 template<typename _Tp, int _rows, int _cols, int _options, int _maxRows, int _maxCols> static inline
 void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
               Eigen::Matrix<_Tp, _rows, _cols, _options, _maxRows, _maxCols>& dst )
 {
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        transpose(src, _dst);
    }
    else
    {
        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        Mat(src).copyTo(_dst);
    }
 }
 template<typename _Tp>  static inline
 void cv2eigen( const Mat& src,
               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
 {
    dst.resize(src.rows, src.cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        if( src.type() == _dst.type() )
            transpose(src, _dst);
        else if( src.cols == src.rows )
        {
            src.convertTo(_dst, _dst.type());
            transpose(_dst, _dst);
        }
        else
            Mat(src.t()).convertTo(_dst, _dst.type());
    }
    else
    {
        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.convertTo(_dst, _dst.type());
    }
 }
 // Matx case
 template<typename _Tp, int _rows, int _cols> static inline
 void cv2eigen( const Matx<_Tp, _rows, _cols>& src,
               Eigen::Matrix<_Tp, Eigen::Dynamic, Eigen::Dynamic>& dst )
 {
    dst.resize(_rows, _cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(_cols, _rows, traits::Type<_Tp>::value,
             dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        transpose(src, _dst);
    }
    else
    {
        const Mat _dst(_rows, _cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        Mat(src).copyTo(_dst);
    }
 }
 template<typename _Tp> static inline
 void cv2eigen( const Mat& src,
               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
 {
    CV_Assert(src.cols == 1);
    dst.resize(src.rows);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        if( src.type() == _dst.type() )
            transpose(src, _dst);
        else
            Mat(src.t()).convertTo(_dst, _dst.type());
    }
    else
    {
        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.convertTo(_dst, _dst.type());
    }
 }
 // Matx case
 template<typename _Tp, int _rows> static inline
 void cv2eigen( const Matx<_Tp, _rows, 1>& src,
               Eigen::Matrix<_Tp, Eigen::Dynamic, 1>& dst )
 {
    dst.resize(_rows);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(1, _rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        transpose(src, _dst);
    }
    else
    {
        const Mat _dst(_rows, 1, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.copyTo(_dst);
    }
 }
 template<typename _Tp> static inline
 void cv2eigen( const Mat& src,
               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
 {
    CV_Assert(src.rows == 1);
    dst.resize(src.cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(src.cols, src.rows, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        if( src.type() == _dst.type() )
            transpose(src, _dst);
        else
            Mat(src.t()).convertTo(_dst, _dst.type());
    }
    else
    {
        const Mat _dst(src.rows, src.cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        src.convertTo(_dst, _dst.type());
    }
 }
 //Matx
 template<typename _Tp, int _cols> static inline
 void cv2eigen( const Matx<_Tp, 1, _cols>& src,
               Eigen::Matrix<_Tp, 1, Eigen::Dynamic>& dst )
 {
    dst.resize(_cols);
    if( !(dst.Flags & Eigen::RowMajorBit) )
    {
        const Mat _dst(_cols, 1, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        transpose(src, _dst);
    }
    else
    {
        const Mat _dst(1, _cols, traits::Type<_Tp>::value,
                 dst.data(), (size_t)(dst.outerStride()*sizeof(_Tp)));
        Mat(src).copyTo(_dst);
    }
 }
 //! @}
 } // cv
 #endif
--- a/3rdparty/libopencv/include/opencv2/core/fast_math.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/fast_math.hpp
@ -0,0 +1,271 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_FAST_MATH_HPP
 #define OPENCV_CORE_FAST_MATH_HPP
 #include "opencv2/core/cvdef.h"
 #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
    && defined __SSE2__ && !defined __APPLE__)) && !defined(__CUDACC__)
 #include <emmintrin.h>
 #endif
 //! @addtogroup core_utils
 //! @{
 /****************************************************************************************\
 *                                      fast math                                         *
 \****************************************************************************************/
 #ifdef __cplusplus
 #  include <cmath>
 #else
 #  ifdef __BORLANDC__
 #    include <fastmath.h>
 #  else
 #    include <math.h>
 #  endif
 #endif
 #ifdef HAVE_TEGRA_OPTIMIZATION
 #  include "tegra_round.hpp"
 #endif
 #if defined __GNUC__ && defined __arm__ && (defined __ARM_PCS_VFP || defined __ARM_VFPV3__ || defined __ARM_NEON__) && !defined __SOFTFP__ && !defined(__CUDACC__)
    // 1. general scheme
    #define ARM_ROUND(_value, _asm_string) \
        int res; \
        float temp; \
        (void)temp; \
        __asm__(_asm_string : [res] "=r" (res), [temp] "=w" (temp) : [value] "w" (_value)); \
        return res
    // 2. version for double
    #ifdef __clang__
        #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %[value] \n vmov %[res], %[temp]")
    #else
        #define ARM_ROUND_DBL(value) ARM_ROUND(value, "vcvtr.s32.f64 %[temp], %P[value] \n vmov %[res], %[temp]")
    #endif
    // 3. version for float
    #define ARM_ROUND_FLT(value) ARM_ROUND(value, "vcvtr.s32.f32 %[temp], %[value]\n vmov %[res], %[temp]")
 #endif
 /** @brief Rounds floating-point number to the nearest integer
 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
 result is not defined.
 */
 CV_INLINE int
 cvRound( double value )
 {
 #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
    __m128d t = _mm_set_sd( value );
    return _mm_cvtsd_si32(t);
 #elif defined _MSC_VER && defined _M_IX86
    int t;
    __asm
    {
        fld value;
        fistp t;
    }
    return t;
 #elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
        defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
    TEGRA_ROUND_DBL(value);
 #elif defined CV_ICC || defined __GNUC__
 # if defined ARM_ROUND_DBL
    ARM_ROUND_DBL(value);
 # else
    return (int)lrint(value);
 # endif
 #else
    /* it's ok if round does not comply with IEEE754 standard;
       the tests should allow +/-1 difference when the tested functions use round */
    return (int)(value + (value >= 0 ? 0.5 : -0.5));
 #endif
 }
 /** @brief Rounds floating-point number to the nearest integer not larger than the original.
 The function computes an integer i such that:
 \f[i \le \texttt{value} < i+1\f]
 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
 result is not defined.
 */
 CV_INLINE int cvFloor( double value )
 {
    int i = (int)value;
    return i - (i > value);
 }
 /** @brief Rounds floating-point number to the nearest integer not smaller than the original.
 The function computes an integer i such that:
 \f[i \le \texttt{value} < i+1\f]
 @param value floating-point number. If the value is outside of INT_MIN ... INT_MAX range, the
 result is not defined.
 */
 CV_INLINE int cvCeil( double value )
 {
    int i = (int)value;
    return i + (i < value);
 }
 /** @brief Determines if the argument is Not A Number.
 @param value The input floating-point value
 The function returns 1 if the argument is Not A Number (as defined by IEEE754 standard), 0
 otherwise. */
 CV_INLINE int cvIsNaN( double value )
 {
    Cv64suf ieee754;
    ieee754.f = value;
    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) +
           ((unsigned)ieee754.u != 0) > 0x7ff00000;
 }
 /** @brief Determines if the argument is Infinity.
 @param value The input floating-point value
 The function returns 1 if the argument is a plus or minus infinity (as defined by IEEE754 standard)
 and 0 otherwise. */
 CV_INLINE int cvIsInf( double value )
 {
    Cv64suf ieee754;
    ieee754.f = value;
    return ((unsigned)(ieee754.u >> 32) & 0x7fffffff) == 0x7ff00000 &&
            (unsigned)ieee754.u == 0;
 }
 #ifdef __cplusplus
 /** @overload */
 CV_INLINE int cvRound(float value)
 {
 #if ((defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__ \
    && defined __SSE2__ && !defined __APPLE__) || CV_SSE2) && !defined(__CUDACC__)
    __m128 t = _mm_set_ss( value );
    return _mm_cvtss_si32(t);
 #elif defined _MSC_VER && defined _M_IX86
    int t;
    __asm
    {
        fld value;
        fistp t;
    }
    return t;
 #elif ((defined _MSC_VER && defined _M_ARM) || defined CV_ICC || \
        defined __GNUC__) && defined HAVE_TEGRA_OPTIMIZATION
    TEGRA_ROUND_FLT(value);
 #elif defined CV_ICC || defined __GNUC__
 # if defined ARM_ROUND_FLT
    ARM_ROUND_FLT(value);
 # else
    return (int)lrintf(value);
 # endif
 #else
    /* it's ok if round does not comply with IEEE754 standard;
     the tests should allow +/-1 difference when the tested functions use round */
    return (int)(value + (value >= 0 ? 0.5f : -0.5f));
 #endif
 }
 /** @overload */
 CV_INLINE int cvRound( int value )
 {
    return value;
 }
 /** @overload */
 CV_INLINE int cvFloor( float value )
 {
    int i = (int)value;
    return i - (i > value);
 }
 /** @overload */
 CV_INLINE int cvFloor( int value )
 {
    return value;
 }
 /** @overload */
 CV_INLINE int cvCeil( float value )
 {
    int i = (int)value;
    return i + (i < value);
 }
 /** @overload */
 CV_INLINE int cvCeil( int value )
 {
    return value;
 }
 /** @overload */
 CV_INLINE int cvIsNaN( float value )
 {
    Cv32suf ieee754;
    ieee754.f = value;
    return (ieee754.u & 0x7fffffff) > 0x7f800000;
 }
 /** @overload */
 CV_INLINE int cvIsInf( float value )
 {
    Cv32suf ieee754;
    ieee754.f = value;
    return (ieee754.u & 0x7fffffff) == 0x7f800000;
 }
 #endif // __cplusplus
 //! @} core_utils
 #endif
--- a/3rdparty/libopencv/include/opencv2/core/hal/hal.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/hal/hal.hpp
@ -0,0 +1,250 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_HAL_HPP
 #define OPENCV_HAL_HPP
 #include "opencv2/core/cvdef.h"
 #include "opencv2/core/cvstd.hpp"
 #include "opencv2/core/hal/interface.h"
 namespace cv { namespace hal {
 //! @addtogroup core_hal_functions
 //! @{
 CV_EXPORTS int normHamming(const uchar* a, int n);
 CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);
 CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
 CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
 CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
 CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
 CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
 CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);
 CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
                        int m_a, int n_a, int n_d, int flags);
 CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
                        int m_a, int n_a, int n_d, int flags);
 CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
                        float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
                        int m_a, int n_a, int n_d, int flags);
 CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
                        double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
                        int m_a, int n_a, int n_d, int flags);
 CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
 CV_EXPORTS float normL1_(const float* a, const float* b, int n);
 CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);
 CV_EXPORTS void exp32f(const float* src, float* dst, int n);
 CV_EXPORTS void exp64f(const double* src, double* dst, int n);
 CV_EXPORTS void log32f(const float* src, float* dst, int n);
 CV_EXPORTS void log64f(const double* src, double* dst, int n);
 CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
 CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
 CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
 CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
 CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
 CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
 CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
 CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);
 CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
 CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
 CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
 CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );
 CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
 CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
 CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
 CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );
 CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
 CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
 CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
 CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
 CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
 CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
 struct CV_EXPORTS DFT1D
 {
    static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
    virtual void apply(const uchar *src, uchar *dst) = 0;
    virtual ~DFT1D() {}
 };
 struct CV_EXPORTS DFT2D
 {
    static Ptr<DFT2D> create(int width, int height, int depth,
                             int src_channels, int dst_channels,
                             int flags, int nonzero_rows = 0);
    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
    virtual ~DFT2D() {}
 };
 struct CV_EXPORTS DCT2D
 {
    static Ptr<DCT2D> create(int width, int height, int depth, int flags);
    virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
    virtual ~DCT2D() {}
 };
 //! @} core_hal
 //=============================================================================
 // for binary compatibility with 3.0
 //! @cond IGNORED
 CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
 CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
 CV_EXPORTS void exp(const float* src, float* dst, int n);
 CV_EXPORTS void exp(const double* src, double* dst, int n);
 CV_EXPORTS void log(const float* src, float* dst, int n);
 CV_EXPORTS void log(const double* src, double* dst, int n);
 CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
 CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
 CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
 CV_EXPORTS void sqrt(const float* src, float* dst, int len);
 CV_EXPORTS void sqrt(const double* src, double* dst, int len);
 CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
 CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
 //! @endcond
 }} //cv::hal
 #endif //OPENCV_HAL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/hal/interface.h
+++ b/3rdparty/libopencv/include/opencv2/core/hal/interface.h
@ -0,0 +1,182 @@
 #ifndef OPENCV_CORE_HAL_INTERFACE_H
 #define OPENCV_CORE_HAL_INTERFACE_H
 //! @addtogroup core_hal_interface
 //! @{
 //! @name Return codes
 //! @{
 #define CV_HAL_ERROR_OK 0
 #define CV_HAL_ERROR_NOT_IMPLEMENTED 1
 #define CV_HAL_ERROR_UNKNOWN -1
 //! @}
 #ifdef __cplusplus
 #include <cstddef>
 #else
 #include <stddef.h>
 #include <stdbool.h>
 #endif
 //! @name Data types
 //! primitive types
 //! - schar  - signed 1 byte integer
 //! - uchar  - unsigned 1 byte integer
 //! - short  - signed 2 byte integer
 //! - ushort - unsigned 2 byte integer
 //! - int    - signed 4 byte integer
 //! - uint   - unsigned 4 byte integer
 //! - int64  - signed 8 byte integer
 //! - uint64 - unsigned 8 byte integer
 //! @{
 #if !defined _MSC_VER && !defined __BORLANDC__
 #  if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
 #    include <cstdint>
 #    ifdef __NEWLIB__
        typedef unsigned int uint;
 #    else
        typedef std::uint32_t uint;
 #    endif
 #  else
 #    include <stdint.h>
     typedef uint32_t uint;
 #  endif
 #else
   typedef unsigned uint;
 #endif
 typedef signed char schar;
 #ifndef __IPL_H__
   typedef unsigned char uchar;
   typedef unsigned short ushort;
 #endif
 #if defined _MSC_VER || defined __BORLANDC__
   typedef __int64 int64;
   typedef unsigned __int64 uint64;
 #  define CV_BIG_INT(n)   n##I64
 #  define CV_BIG_UINT(n)  n##UI64
 #else
   typedef int64_t int64;
   typedef uint64_t uint64;
 #  define CV_BIG_INT(n)   n##LL
 #  define CV_BIG_UINT(n)  n##ULL
 #endif
 #define CV_CN_MAX     512
 #define CV_CN_SHIFT   3
 #define CV_DEPTH_MAX  (1 << CV_CN_SHIFT)
 #define CV_8U   0
 #define CV_8S   1
 #define CV_16U  2
 #define CV_16S  3
 #define CV_32S  4
 #define CV_32F  5
 #define CV_64F  6
 #define CV_USRTYPE1 7
 #define CV_MAT_DEPTH_MASK       (CV_DEPTH_MAX - 1)
 #define CV_MAT_DEPTH(flags)     ((flags) & CV_MAT_DEPTH_MASK)
 #define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
 #define CV_MAKE_TYPE CV_MAKETYPE
 #define CV_8UC1 CV_MAKETYPE(CV_8U,1)
 #define CV_8UC2 CV_MAKETYPE(CV_8U,2)
 #define CV_8UC3 CV_MAKETYPE(CV_8U,3)
 #define CV_8UC4 CV_MAKETYPE(CV_8U,4)
 #define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
 #define CV_8SC1 CV_MAKETYPE(CV_8S,1)
 #define CV_8SC2 CV_MAKETYPE(CV_8S,2)
 #define CV_8SC3 CV_MAKETYPE(CV_8S,3)
 #define CV_8SC4 CV_MAKETYPE(CV_8S,4)
 #define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
 #define CV_16UC1 CV_MAKETYPE(CV_16U,1)
 #define CV_16UC2 CV_MAKETYPE(CV_16U,2)
 #define CV_16UC3 CV_MAKETYPE(CV_16U,3)
 #define CV_16UC4 CV_MAKETYPE(CV_16U,4)
 #define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
 #define CV_16SC1 CV_MAKETYPE(CV_16S,1)
 #define CV_16SC2 CV_MAKETYPE(CV_16S,2)
 #define CV_16SC3 CV_MAKETYPE(CV_16S,3)
 #define CV_16SC4 CV_MAKETYPE(CV_16S,4)
 #define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
 #define CV_32SC1 CV_MAKETYPE(CV_32S,1)
 #define CV_32SC2 CV_MAKETYPE(CV_32S,2)
 #define CV_32SC3 CV_MAKETYPE(CV_32S,3)
 #define CV_32SC4 CV_MAKETYPE(CV_32S,4)
 #define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
 #define CV_32FC1 CV_MAKETYPE(CV_32F,1)
 #define CV_32FC2 CV_MAKETYPE(CV_32F,2)
 #define CV_32FC3 CV_MAKETYPE(CV_32F,3)
 #define CV_32FC4 CV_MAKETYPE(CV_32F,4)
 #define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
 #define CV_64FC1 CV_MAKETYPE(CV_64F,1)
 #define CV_64FC2 CV_MAKETYPE(CV_64F,2)
 #define CV_64FC3 CV_MAKETYPE(CV_64F,3)
 #define CV_64FC4 CV_MAKETYPE(CV_64F,4)
 #define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
 //! @}
 //! @name Comparison operation
 //! @sa cv::CmpTypes
 //! @{
 #define CV_HAL_CMP_EQ 0
 #define CV_HAL_CMP_GT 1
 #define CV_HAL_CMP_GE 2
 #define CV_HAL_CMP_LT 3
 #define CV_HAL_CMP_LE 4
 #define CV_HAL_CMP_NE 5
 //! @}
 //! @name Border processing modes
 //! @sa cv::BorderTypes
 //! @{
 #define CV_HAL_BORDER_CONSTANT 0
 #define CV_HAL_BORDER_REPLICATE 1
 #define CV_HAL_BORDER_REFLECT 2
 #define CV_HAL_BORDER_WRAP 3
 #define CV_HAL_BORDER_REFLECT_101 4
 #define CV_HAL_BORDER_TRANSPARENT 5
 #define CV_HAL_BORDER_ISOLATED 16
 //! @}
 //! @name DFT flags
 //! @{
 #define CV_HAL_DFT_INVERSE        1
 #define CV_HAL_DFT_SCALE          2
 #define CV_HAL_DFT_ROWS           4
 #define CV_HAL_DFT_COMPLEX_OUTPUT 16
 #define CV_HAL_DFT_REAL_OUTPUT    32
 #define CV_HAL_DFT_TWO_STAGE      64
 #define CV_HAL_DFT_STAGE_COLS    128
 #define CV_HAL_DFT_IS_CONTINUOUS 512
 #define CV_HAL_DFT_IS_INPLACE 1024
 //! @}
 //! @name SVD flags
 //! @{
 #define CV_HAL_SVD_NO_UV    1
 #define CV_HAL_SVD_SHORT_UV 2
 #define CV_HAL_SVD_MODIFY_A 4
 #define CV_HAL_SVD_FULL_UV  8
 //! @}
 //! @name Gemm flags
 //! @{
 #define CV_HAL_GEMM_1_T 1
 #define CV_HAL_GEMM_2_T 2
 #define CV_HAL_GEMM_3_T 4
 //! @}
 //! @}
 #endif
--- a/3rdparty/libopencv/include/opencv2/core/hal/intrin.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/hal/intrin.hpp
@ -0,0 +1,472 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_HAL_INTRIN_HPP
 #define OPENCV_HAL_INTRIN_HPP
 #include <cmath>
 #include <float.h>
 #include <stdlib.h>
 #include "opencv2/core/cvdef.h"
 #define OPENCV_HAL_ADD(a, b) ((a) + (b))
 #define OPENCV_HAL_AND(a, b) ((a) & (b))
 #define OPENCV_HAL_NOP(a) (a)
 #define OPENCV_HAL_1ST(a, b) (a)
 // unlike HAL API, which is in cv::hal,
 // we put intrinsics into cv namespace to make its
 // access from within opencv code more accessible
 namespace cv {
 #ifndef CV_DOXYGEN
 #ifdef CV_CPU_DISPATCH_MODE
 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
 #else
 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
 #define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
 #endif
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
 #endif
 //! @addtogroup core_hal_intrin
 //! @{
 //! @cond IGNORED
 template<typename _Tp> struct V_TypeTraits
 {
    typedef _Tp int_type;
    typedef _Tp uint_type;
    typedef _Tp abs_type;
    typedef _Tp sum_type;
    enum { delta = 0, shift = 0 };
    static int_type reinterpret_int(_Tp x) { return x; }
    static uint_type reinterpet_uint(_Tp x) { return x; }
    static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; }
 };
 template<> struct V_TypeTraits<uchar>
 {
    typedef uchar value_type;
    typedef schar int_type;
    typedef uchar uint_type;
    typedef uchar abs_type;
    typedef int sum_type;
    typedef ushort w_type;
    typedef unsigned q_type;
    enum { delta = 128, shift = 8 };
    static int_type reinterpret_int(value_type x) { return (int_type)x; }
    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 };
 template<> struct V_TypeTraits<schar>
 {
    typedef schar value_type;
    typedef schar int_type;
    typedef uchar uint_type;
    typedef uchar abs_type;
    typedef int sum_type;
    typedef short w_type;
    typedef int q_type;
    enum { delta = 128, shift = 8 };
    static int_type reinterpret_int(value_type x) { return (int_type)x; }
    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 };
 template<> struct V_TypeTraits<ushort>
 {
    typedef ushort value_type;
    typedef short int_type;
    typedef ushort uint_type;
    typedef ushort abs_type;
    typedef int sum_type;
    typedef unsigned w_type;
    typedef uchar nu_type;
    enum { delta = 32768, shift = 16 };
    static int_type reinterpret_int(value_type x) { return (int_type)x; }
    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 };
 template<> struct V_TypeTraits<short>
 {
    typedef short value_type;
    typedef short int_type;
    typedef ushort uint_type;
    typedef ushort abs_type;
    typedef int sum_type;
    typedef int w_type;
    typedef uchar nu_type;
    typedef schar n_type;
    enum { delta = 128, shift = 8 };
    static int_type reinterpret_int(value_type x) { return (int_type)x; }
    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 };
 template<> struct V_TypeTraits<unsigned>
 {
    typedef unsigned value_type;
    typedef int int_type;
    typedef unsigned uint_type;
    typedef unsigned abs_type;
    typedef unsigned sum_type;
    typedef uint64 w_type;
    typedef ushort nu_type;
    static int_type reinterpret_int(value_type x) { return (int_type)x; }
    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 };
 template<> struct V_TypeTraits<int>
 {
    typedef int value_type;
    typedef int int_type;
    typedef unsigned uint_type;
    typedef unsigned abs_type;
    typedef int sum_type;
    typedef int64 w_type;
    typedef short n_type;
    typedef ushort nu_type;
    static int_type reinterpret_int(value_type x) { return (int_type)x; }
    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 };
 template<> struct V_TypeTraits<uint64>
 {
    typedef uint64 value_type;
    typedef int64 int_type;
    typedef uint64 uint_type;
    typedef uint64 abs_type;
    typedef uint64 sum_type;
    typedef unsigned nu_type;
    static int_type reinterpret_int(value_type x) { return (int_type)x; }
    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 };
 template<> struct V_TypeTraits<int64>
 {
    typedef int64 value_type;
    typedef int64 int_type;
    typedef uint64 uint_type;
    typedef uint64 abs_type;
    typedef int64 sum_type;
    typedef int nu_type;
    static int_type reinterpret_int(value_type x) { return (int_type)x; }
    static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
    static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
 };
 template<> struct V_TypeTraits<float>
 {
    typedef float value_type;
    typedef int int_type;
    typedef unsigned uint_type;
    typedef float abs_type;
    typedef float sum_type;
    typedef double w_type;
    static int_type reinterpret_int(value_type x)
    {
        Cv32suf u;
        u.f = x;
        return u.i;
    }
    static uint_type reinterpet_uint(value_type x)
    {
        Cv32suf u;
        u.f = x;
        return u.u;
    }
    static value_type reinterpret_from_int(int_type x)
    {
        Cv32suf u;
        u.i = x;
        return u.f;
    }
 };
 template<> struct V_TypeTraits<double>
 {
    typedef double value_type;
    typedef int64 int_type;
    typedef uint64 uint_type;
    typedef double abs_type;
    typedef double sum_type;
    static int_type reinterpret_int(value_type x)
    {
        Cv64suf u;
        u.f = x;
        return u.i;
    }
    static uint_type reinterpet_uint(value_type x)
    {
        Cv64suf u;
        u.f = x;
        return u.u;
    }
    static value_type reinterpret_from_int(int_type x)
    {
        Cv64suf u;
        u.i = x;
        return u.f;
    }
 };
 template <typename T> struct V_SIMD128Traits
 {
    enum { nlanes = 16 / sizeof(T) };
 };
 //! @endcond
 //! @}
 #ifndef CV_DOXYGEN
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 #endif
 }
 #ifdef CV_DOXYGEN
 #   undef CV_SSE2
 #   undef CV_NEON
 #   undef CV_VSX
 #endif
 #if CV_SSE2
 #include "opencv2/core/hal/intrin_sse.hpp"
 #elif CV_NEON
 #include "opencv2/core/hal/intrin_neon.hpp"
 #elif CV_VSX
 #include "opencv2/core/hal/intrin_vsx.hpp"
 #else
 #include "opencv2/core/hal/intrin_cpp.hpp"
 #endif
 //! @addtogroup core_hal_intrin
 //! @{
 #ifndef CV_SIMD128
 //! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
 #define CV_SIMD128 0
 #endif
 #ifndef CV_SIMD128_64F
 //! Set to 1 if current intrinsics implementation supports 64-bit float vectors
 #define CV_SIMD128_64F 0
 #endif
 //! @}
 //==================================================================================================
 //! @cond IGNORED
 namespace cv {
 #ifndef CV_DOXYGEN
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
 #endif
 template <typename R> struct V_RegTrait128;
 template <> struct V_RegTrait128<uchar> {
    typedef v_uint8x16 reg;
    typedef v_uint16x8 w_reg;
    typedef v_uint32x4 q_reg;
    typedef v_uint8x16 u_reg;
    static v_uint8x16 zero() { return v_setzero_u8(); }
    static v_uint8x16 all(uchar val) { return v_setall_u8(val); }
 };
 template <> struct V_RegTrait128<schar> {
    typedef v_int8x16 reg;
    typedef v_int16x8 w_reg;
    typedef v_int32x4 q_reg;
    typedef v_uint8x16 u_reg;
    static v_int8x16 zero() { return v_setzero_s8(); }
    static v_int8x16 all(schar val) { return v_setall_s8(val); }
 };
 template <> struct V_RegTrait128<ushort> {
    typedef v_uint16x8 reg;
    typedef v_uint32x4 w_reg;
    typedef v_int16x8 int_reg;
    typedef v_uint16x8 u_reg;
    static v_uint16x8 zero() { return v_setzero_u16(); }
    static v_uint16x8 all(ushort val) { return v_setall_u16(val); }
 };
 template <> struct V_RegTrait128<short> {
    typedef v_int16x8 reg;
    typedef v_int32x4 w_reg;
    typedef v_uint16x8 u_reg;
    static v_int16x8 zero() { return v_setzero_s16(); }
    static v_int16x8 all(short val) { return v_setall_s16(val); }
 };
 template <> struct V_RegTrait128<unsigned> {
    typedef v_uint32x4 reg;
    typedef v_uint64x2 w_reg;
    typedef v_int32x4 int_reg;
    typedef v_uint32x4 u_reg;
    static v_uint32x4 zero() { return v_setzero_u32(); }
    static v_uint32x4 all(unsigned val) { return v_setall_u32(val); }
 };
 template <> struct V_RegTrait128<int> {
    typedef v_int32x4 reg;
    typedef v_int64x2 w_reg;
    typedef v_uint32x4 u_reg;
    static v_int32x4 zero() { return v_setzero_s32(); }
    static v_int32x4 all(int val) { return v_setall_s32(val); }
 };
 template <> struct V_RegTrait128<uint64> {
    typedef v_uint64x2 reg;
    static v_uint64x2 zero() { return v_setzero_u64(); }
    static v_uint64x2 all(uint64 val) { return v_setall_u64(val); }
 };
 template <> struct V_RegTrait128<int64> {
    typedef v_int64x2 reg;
    static v_int64x2 zero() { return v_setzero_s64(); }
    static v_int64x2 all(int64 val) { return v_setall_s64(val); }
 };
 template <> struct V_RegTrait128<float> {
    typedef v_float32x4 reg;
    typedef v_int32x4 int_reg;
    typedef v_float32x4 u_reg;
    static v_float32x4 zero() { return v_setzero_f32(); }
    static v_float32x4 all(float val) { return v_setall_f32(val); }
 };
 #if CV_SIMD128_64F
 template <> struct V_RegTrait128<double> {
    typedef v_float64x2 reg;
    typedef v_int32x4 int_reg;
    typedef v_float64x2 u_reg;
    static v_float64x2 zero() { return v_setzero_f64(); }
    static v_float64x2 all(double val) { return v_setall_f64(val); }
 };
 #endif
 inline unsigned int trailingZeros32(unsigned int value) {
 #if defined(_MSC_VER)
 #if (_MSC_VER < 1700) || defined(_M_ARM)
    unsigned long index = 0;
    _BitScanForward(&index, value);
    return (unsigned int)index;
 #else
    return _tzcnt_u32(value);
 #endif
 #elif defined(__GNUC__) || defined(__GNUG__)
    return __builtin_ctz(value);
 #elif defined(__ICC) || defined(__INTEL_COMPILER)
    return _bit_scan_forward(value);
 #elif defined(__clang__)
    return llvm.cttz.i32(value, true);
 #else
    static const int MultiplyDeBruijnBitPosition[32] = {
        0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
        31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
    return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
 #endif
 }
 #ifndef CV_DOXYGEN
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 #endif
 } // cv::
 //! @endcond
 #endif
--- a/3rdparty/libopencv/include/opencv2/core/hal/intrin_cpp.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/hal/intrin_cpp.hpp
--- a/3rdparty/libopencv/include/opencv2/core/hal/intrin_neon.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/hal/intrin_neon.hpp
--- a/3rdparty/libopencv/include/opencv2/core/hal/intrin_sse.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/hal/intrin_sse.hpp
--- a/3rdparty/libopencv/include/opencv2/core/hal/intrin_vsx.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/hal/intrin_vsx.hpp
@ -0,0 +1,962 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_HAL_VSX_HPP
 #define OPENCV_HAL_VSX_HPP
 #include <algorithm>
 #include "opencv2/core/utility.hpp"
 #define CV_SIMD128 1
 #define CV_SIMD128_64F 1
 /**
 * todo: supporting half precision for power9
 * convert instractions xvcvhpsp, xvcvsphp
 **/
 namespace cv
 {
 //! @cond IGNORED
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
 ///////// Types ////////////
 struct v_uint8x16
 {
    typedef uchar lane_type;
    enum { nlanes = 16 };
    vec_uchar16 val;
    explicit v_uint8x16(const vec_uchar16& v) : val(v)
    {}
    v_uint8x16() : val(vec_uchar16_z)
    {}
    v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v))
    {}
    v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
               uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
        : val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
    {}
    uchar get0() const
    { return vec_extract(val, 0); }
 };
 struct v_int8x16
 {
    typedef schar lane_type;
    enum { nlanes = 16 };
    vec_char16 val;
    explicit v_int8x16(const vec_char16& v) : val(v)
    {}
    v_int8x16() : val(vec_char16_z)
    {}
    v_int8x16(vec_bchar16 v) : val(vec_char16_c(v))
    {}
    v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
              schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
        : val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
    {}
    schar get0() const
    { return vec_extract(val, 0); }
 };
 struct v_uint16x8
 {
    typedef ushort lane_type;
    enum { nlanes = 8 };
    vec_ushort8 val;
    explicit v_uint16x8(const vec_ushort8& v) : val(v)
    {}
    v_uint16x8() : val(vec_ushort8_z)
    {}
    v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v))
    {}
    v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
        : val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7))
    {}
    ushort get0() const
    { return vec_extract(val, 0); }
 };
 struct v_int16x8
 {
    typedef short lane_type;
    enum { nlanes = 8 };
    vec_short8 val;
    explicit v_int16x8(const vec_short8& v) : val(v)
    {}
    v_int16x8() : val(vec_short8_z)
    {}
    v_int16x8(vec_bshort8 v) : val(vec_short8_c(v))
    {}
    v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
        : val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7))
    {}
    short get0() const
    { return vec_extract(val, 0); }
 };
 struct v_uint32x4
 {
    typedef unsigned lane_type;
    enum { nlanes = 4 };
    vec_uint4 val;
    explicit v_uint32x4(const vec_uint4& v) : val(v)
    {}
    v_uint32x4() : val(vec_uint4_z)
    {}
    v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v))
    {}
    v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3))
    {}
    uint get0() const
    { return vec_extract(val, 0); }
 };
 struct v_int32x4
 {
    typedef int lane_type;
    enum { nlanes = 4 };
    vec_int4 val;
    explicit v_int32x4(const vec_int4& v) : val(v)
    {}
    v_int32x4() : val(vec_int4_z)
    {}
    v_int32x4(vec_bint4 v) : val(vec_int4_c(v))
    {}
    v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3))
    {}
    int get0() const
    { return vec_extract(val, 0); }
 };
 struct v_float32x4
 {
    typedef float lane_type;
    enum { nlanes = 4 };
    vec_float4 val;
    explicit v_float32x4(const vec_float4& v) : val(v)
    {}
    v_float32x4() : val(vec_float4_z)
    {}
    v_float32x4(vec_bint4 v) : val(vec_float4_c(v))
    {}
    v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3))
    {}
    float get0() const
    { return vec_extract(val, 0); }
 };
 struct v_uint64x2
 {
    typedef uint64 lane_type;
    enum { nlanes = 2 };
    vec_udword2 val;
    explicit v_uint64x2(const vec_udword2& v) : val(v)
    {}
    v_uint64x2() : val(vec_udword2_z)
    {}
    v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v))
    {}
    v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1))
    {}
    uint64 get0() const
    { return vec_extract(val, 0); }
 };
 struct v_int64x2
 {
    typedef int64 lane_type;
    enum { nlanes = 2 };
    vec_dword2 val;
    explicit v_int64x2(const vec_dword2& v) : val(v)
    {}
    v_int64x2() : val(vec_dword2_z)
    {}
    v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v))
    {}
    v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1))
    {}
    int64 get0() const
    { return vec_extract(val, 0); }
 };
 struct v_float64x2
 {
    typedef double lane_type;
    enum { nlanes = 2 };
    vec_double2 val;
    explicit v_float64x2(const vec_double2& v) : val(v)
    {}
    v_float64x2() : val(vec_double2_z)
    {}
    v_float64x2(vec_bdword2 v) : val(vec_double2_c(v))
    {}
    v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1))
    {}
    double get0() const
    { return vec_extract(val, 0); }
 };
 //////////////// Load and store operations ///////////////
 /*
 * clang-5 aborted during parse "vec_xxx_c" only if it's
 * inside a function template which is defined by preprocessor macro.
 *
 * if vec_xxx_c defined as C++ cast, clang-5 will pass it
 */
 #define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast)                        \
 inline _Tpvec v_setzero_##suffix() { return _Tpvec(); }                               \
 inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));}          \
 template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a)  \
 { return _Tpvec((cast)a.val); }
 OPENCV_HAL_IMPL_VSX_INITVEC(v_uint8x16, uchar, u8, vec_uchar16)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_int8x16, schar, s8, vec_char16)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_uint16x8, ushort, u16, vec_ushort8)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_int16x8, short, s16, vec_short8)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_uint32x4, uint, u32, vec_uint4)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_int32x4, int, s32, vec_int4)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_uint64x2, uint64, u64, vec_udword2)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
 OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)
 #define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \
 inline _Tpvec v_load(const _Tp* ptr)                                        \
 { return _Tpvec(ld_func(0, ptr)); }                                         \
 inline _Tpvec v_load_aligned(const _Tp* ptr)                                \
 { return _Tpvec(ld_func(0, ptr)); }                                         \
 inline _Tpvec v_load_low(const _Tp* ptr)                                    \
 { return _Tpvec(vec_ld_l8(ptr)); }                                          \
 inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1)               \
 { return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); }          \
 inline void v_store(_Tp* ptr, const _Tpvec& a)                              \
 { st_func(a.val, 0, ptr); }                                                 \
 inline void v_store_aligned(_Tp* ptr, const _Tpvec& a)                      \
 { st_func(a.val, 0, ptr); }                                                 \
 inline void v_store_low(_Tp* ptr, const _Tpvec& a)                          \
 { vec_st_l8(a.val, ptr); }                                                  \
 inline void v_store_high(_Tp* ptr, const _Tpvec& a)                         \
 { vec_st_h8(a.val, ptr); }
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st)
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st)
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st)
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st)
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st)
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st)
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st)
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st)
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2)
 OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2)
 //////////////// Value reordering ///////////////
 /* de&interleave */
 #define OPENCV_HAL_IMPL_VSX_INTERLEAVE(_Tp, _Tpvec)                          \
 inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b)        \
 { vec_ld_deinterleave(ptr, a.val, b.val);}                                   \
 inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a,                   \
                                _Tpvec& b, _Tpvec& c)                        \
 { vec_ld_deinterleave(ptr, a.val, b.val, c.val); }                           \
 inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b,        \
                                                _Tpvec& c, _Tpvec& d)        \
 { vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); }                    \
 inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b)   \
 { vec_st_interleave(a.val, b.val, ptr); }                                    \
 inline void v_store_interleave(_Tp* ptr, const _Tpvec& a,                    \
                               const _Tpvec& b, const _Tpvec& c)             \
 { vec_st_interleave(a.val, b.val, c.val, ptr); }                             \
 inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b,   \
                                         const _Tpvec& c, const _Tpvec& d)   \
 { vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
 OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16)
 OPENCV_HAL_IMPL_VSX_INTERLEAVE(schar, v_int8x16)
 OPENCV_HAL_IMPL_VSX_INTERLEAVE(ushort, v_uint16x8)
 OPENCV_HAL_IMPL_VSX_INTERLEAVE(short, v_int16x8)
 OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4)
 OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4)
 OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4)
 OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2)
 /* Expand */
 #define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh)  \
 inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1)   \
 {                                                                 \
    b0.val = fh(a.val);                                           \
    b1.val = fl(a.val);                                           \
 }                                                                 \
 inline _Tpwvec v_load_expand(const _Tp* ptr)                      \
 { return _Tpwvec(fh(vsx_ld(0, ptr))); }
 OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
 OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
 OPENCV_HAL_IMPL_VSX_EXPAND(v_uint16x8, v_uint32x4, ushort, vec_unpacklu, vec_unpackhu)
 OPENCV_HAL_IMPL_VSX_EXPAND(v_int16x8, v_int32x4, short, vec_unpackl, vec_unpackh)
 OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpackhu)
 OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
 inline v_uint32x4 v_load_expand_q(const uchar* ptr)
 { return v_uint32x4(vec_ld_buw(ptr)); }
 inline v_int32x4 v_load_expand_q(const schar* ptr)
 { return v_int32x4(vec_ld_bsw(ptr)); }
 /* pack */
 #define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack)    \
 inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b)                                          \
 {                                                                                                   \
    return _Tpvec(pkfnc(a.val, b.val));                                                             \
 }                                                                                                   \
 inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a)                                            \
 {                                                                                                   \
    vec_st_l8(pkfnc(a.val, a.val), ptr);                                                            \
 }                                                                                                   \
 template<int n>                                                                                     \
 inline _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b)                                     \
 {                                                                                                   \
    const __vector _Tpvn vn = vec_splats((_Tpvn)n);                                                 \
    const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1)));                         \
    return _Tpvec(pkfnc(sfnc(addfnc(a.val, delta), vn), sfnc(addfnc(b.val, delta), vn)));           \
 }                                                                                                   \
 template<int n>                                                                                     \
 inline void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a)                                       \
 {                                                                                                   \
    const __vector _Tpvn vn = vec_splats((_Tpvn)n);                                                 \
    const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1)));                         \
    vec_st_l8(pkfnc(sfnc(addfnc(a.val, delta), vn), delta), ptr);                                   \
 }
 OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_uint16x8, unsigned short, unsigned short,
                         vec_sr, vec_packs, vec_adds, pack)
 OPENCV_HAL_IMPL_VSX_PACK(v_int8x16, schar, v_int16x8, unsigned short, short,
                         vec_sra, vec_packs, vec_adds, pack)
 OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_uint32x4, unsigned int, unsigned int,
                         vec_sr, vec_packs, vec_add, pack)
 OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int,
                         vec_sra, vec_packs, vec_add, pack)
 OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long,
                         vec_sr, vec_pack, vec_add, pack)
 OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long,
                         vec_sra, vec_pack, vec_add, pack)
 OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short,
                         vec_sra, vec_packsu, vec_adds, pack_u)
 OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int,
                         vec_sra, vec_packsu, vec_add, pack_u)
 // Following variant is not implemented on other platforms:
 //OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
 //                         vec_sra, vec_packsu, vec_add, pack_u)
 /* Recombine */
 template <typename _Tpvec>
 inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
 {
    b0.val = vec_mergeh(a0.val, a1.val);
    b1.val = vec_mergel(a0.val, a1.val);
 }
 template <typename _Tpvec>
 inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b)
 { return _Tpvec(vec_mergesql(a.val, b.val)); }
 template <typename _Tpvec>
 inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b)
 { return _Tpvec(vec_mergesqh(a.val, b.val)); }
 template <typename _Tpvec>
 inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
 {
    c.val = vec_mergesqh(a.val, b.val);
    d.val = vec_mergesql(a.val, b.val);
 }
 /* Extract */
 template<int s, typename _Tpvec>
 inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
 {
    const int w = sizeof(typename _Tpvec::lane_type);
    const int n = _Tpvec::nlanes;
    const unsigned int sf = ((w * n) - (s * w));
    if (s == 0)
        return _Tpvec(a.val);
    else if (sf > 15)
        return _Tpvec();
    // bitwise it just to make xlc happy
    return _Tpvec(vec_sld(b.val, a.val, sf & 15));
 }
 #define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec)             \
 template<int s>                                           \
 inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
 {                                                         \
    switch(s) {                                           \
    case 0: return _Tpvec(a.val);                         \
    case 2: return _Tpvec(b.val);                         \
    case 1: return _Tpvec(vec_sldw(b.val, a.val, 2));     \
    default: return _Tpvec();                             \
    }                                                     \
 }
 OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2)
 OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2)
 ////////// Arithmetic, bitwise and comparison operations /////////
 /* Element-wise binary and unary operations */
 /** Arithmetics **/
 #define OPENCV_HAL_IMPL_VSX_BIN_OP(bin_op, _Tpvec, intrin)       \
 inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
 { return _Tpvec(intrin(a.val, b.val)); }                         \
 inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b)   \
 { a.val = intrin(a.val, b.val); return a; }
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint8x16, vec_adds)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint8x16, vec_subs)
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16,  vec_adds)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs)
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs)
 OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint16x8, vec_mul)
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs)
 OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int16x8, vec_mul)
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub)
 OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul)
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int32x4, vec_add)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int32x4, vec_sub)
 OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int32x4, vec_mul)
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float32x4, vec_add)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float32x4, vec_sub)
 OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float32x4, vec_mul)
 OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float32x4, vec_div)
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float64x2, vec_add)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float64x2, vec_sub)
 OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float64x2, vec_mul)
 OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float64x2, vec_div)
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint64x2, vec_add)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub)
 OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add)
 OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub)
 inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, v_int32x4& c, v_int32x4& d)
 {
    c.val = vec_mul(vec_unpackh(a.val), vec_unpackh(b.val));
    d.val = vec_mul(vec_unpackl(a.val), vec_unpackl(b.val));
 }
 inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, v_uint32x4& c, v_uint32x4& d)
 {
    c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
    d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
 }
 inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c, v_uint64x2& d)
 {
    c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
    d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
 }
 /** Non-saturating arithmetics **/
 #define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin)    \
 template<typename _Tpvec>                             \
 inline _Tpvec func(const _Tpvec& a, const _Tpvec& b)  \
 { return _Tpvec(intrin(a.val, b.val)); }
 OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add)
 OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub)
 /** Bitwise shifts **/
 #define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc)   \
 inline _Tpvec operator << (const _Tpvec& a, int imm)         \
 { return _Tpvec(vec_sl(a.val, splfunc(imm))); }              \
 inline _Tpvec operator >> (const _Tpvec& a, int imm)         \
 { return _Tpvec(shr(a.val, splfunc(imm))); }                 \
 template<int imm> inline _Tpvec v_shl(const _Tpvec& a)       \
 { return _Tpvec(vec_sl(a.val, splfunc(imm))); }              \
 template<int imm> inline _Tpvec v_shr(const _Tpvec& a)       \
 { return _Tpvec(shr(a.val, splfunc(imm))); }
 OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint8x16, vec_sr, vec_uchar16_sp)
 OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint16x8, vec_sr, vec_ushort8_sp)
 OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint32x4, vec_sr, vec_uint4_sp)
 OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint64x2, vec_sr, vec_udword2_sp)
 // algebraic right shift
 OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int8x16, vec_sra, vec_uchar16_sp)
 OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int16x8, vec_sra, vec_ushort8_sp)
 OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int32x4, vec_sra, vec_uint4_sp)
 OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int64x2, vec_sra, vec_udword2_sp)
 /** Bitwise logic **/
 #define OPENCV_HAL_IMPL_VSX_LOGIC_OP(_Tpvec)    \
 OPENCV_HAL_IMPL_VSX_BIN_OP(&, _Tpvec, vec_and)  \
 OPENCV_HAL_IMPL_VSX_BIN_OP(|, _Tpvec, vec_or)   \
 OPENCV_HAL_IMPL_VSX_BIN_OP(^, _Tpvec, vec_xor)  \
 inline _Tpvec operator ~ (const _Tpvec& a)      \
 { return _Tpvec(vec_not(a.val)); }
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint8x16)
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int8x16)
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint16x8)
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int16x8)
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint32x4)
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int32x4)
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint64x2)
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int64x2)
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float32x4)
 OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float64x2)
 /** Bitwise select **/
 #define OPENCV_HAL_IMPL_VSX_SELECT(_Tpvec, cast)                             \
 inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
 { return _Tpvec(vec_sel(b.val, a.val, cast(mask.val))); }
 OPENCV_HAL_IMPL_VSX_SELECT(v_uint8x16, vec_bchar16_c)
 OPENCV_HAL_IMPL_VSX_SELECT(v_int8x16, vec_bchar16_c)
 OPENCV_HAL_IMPL_VSX_SELECT(v_uint16x8, vec_bshort8_c)
 OPENCV_HAL_IMPL_VSX_SELECT(v_int16x8, vec_bshort8_c)
 OPENCV_HAL_IMPL_VSX_SELECT(v_uint32x4, vec_bint4_c)
 OPENCV_HAL_IMPL_VSX_SELECT(v_int32x4, vec_bint4_c)
 OPENCV_HAL_IMPL_VSX_SELECT(v_float32x4, vec_bint4_c)
 OPENCV_HAL_IMPL_VSX_SELECT(v_float64x2, vec_bdword2_c)
 /** Comparison **/
 #define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec)                 \
 inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b)   \
 { return _Tpvec(vec_cmpeq(a.val, b.val)); }                    \
 inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b)   \
 { return _Tpvec(vec_cmpne(a.val, b.val)); }                    \
 inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b)    \
 { return _Tpvec(vec_cmplt(a.val, b.val)); }                    \
 inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b)    \
 { return _Tpvec(vec_cmpgt(a.val, b.val)); }                    \
 inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b)   \
 { return _Tpvec(vec_cmple(a.val, b.val)); }                    \
 inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b)   \
 { return _Tpvec(vec_cmpge(a.val, b.val)); }
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint8x16)
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int8x16)
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint16x8)
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int16x8)
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint32x4)
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int32x4)
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float32x4)
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2)
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2)
 OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2)
 /** min/max **/
 OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min)
 OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max)
 /** Rotate **/
 #define OPENCV_IMPL_VSX_ROTATE(_Tpvec, suffix, shf, cast)                       \
 template<int imm>                                                               \
 inline _Tpvec v_rotate_##suffix(const _Tpvec& a)                                \
 {                                                                               \
    const int wd = imm * sizeof(typename _Tpvec::lane_type);                    \
    if (wd > 15)                                                                \
        return _Tpvec();                                                        \
    return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3)));    \
 }
 #define OPENCV_IMPL_VSX_ROTATE_LR(_Tpvec, cast)     \
 OPENCV_IMPL_VSX_ROTATE(_Tpvec, left, vec_slo, cast) \
 OPENCV_IMPL_VSX_ROTATE(_Tpvec, right, vec_sro, cast)
 OPENCV_IMPL_VSX_ROTATE_LR(v_uint8x16, vec_uchar16)
 OPENCV_IMPL_VSX_ROTATE_LR(v_int8x16,  vec_char16)
 OPENCV_IMPL_VSX_ROTATE_LR(v_uint16x8, vec_ushort8)
 OPENCV_IMPL_VSX_ROTATE_LR(v_int16x8,  vec_short8)
 OPENCV_IMPL_VSX_ROTATE_LR(v_uint32x4, vec_uint4)
 OPENCV_IMPL_VSX_ROTATE_LR(v_int32x4,  vec_int4)
 OPENCV_IMPL_VSX_ROTATE_LR(v_uint64x2, vec_udword2)
 OPENCV_IMPL_VSX_ROTATE_LR(v_int64x2,  vec_dword2)
 template<int imm, typename _Tpvec>
 inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b)
 {
    enum { CV_SHIFT = 16 - imm * (sizeof(typename _Tpvec::lane_type)) };
    if (CV_SHIFT == 16)
        return a;
 #ifdef __IBMCPP__
    return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT & 15));
 #else
    return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT));
 #endif
 }
 template<int imm, typename _Tpvec>
 inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)
 {
    enum { CV_SHIFT = imm * (sizeof(typename _Tpvec::lane_type)) };
    if (CV_SHIFT == 16)
        return b;
    return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT));
 }
 #define OPENCV_IMPL_VSX_ROTATE_64(_Tpvec, suffix, rg1, rg2)       \
 template<int imm>                                                 \
 inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
 {                                                                 \
    if (imm == 1)                                                 \
        return _Tpvec(vec_permi(rg1.val, rg2.val, 2));            \
    return imm ? b : a;                                           \
 }
 OPENCV_IMPL_VSX_ROTATE_64(v_int64x2,  right, a, b)
 OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
 OPENCV_IMPL_VSX_ROTATE_64(v_int64x2,  left, b, a)
 OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)
 ////////// Reduce and mask /////////
 /** Reduce **/
 inline short v_reduce_sum(const v_int16x8& a)
 {
    const vec_int4 zero = vec_int4_z;
    return saturate_cast<short>(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3));
 }
 inline ushort v_reduce_sum(const v_uint16x8& a)
 {
    const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8))));
    return saturate_cast<ushort>(vec_extract(vec_sums(v4, vec_int4_z), 3));
 }
 #define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \
 inline scalartype v_reduce_##suffix(const _Tpvec& a)                               \
 {                                                                                  \
    const _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8));                      \
    return vec_extract(func(rs, vec_sld(rs, rs, 4)), 0);                           \
 }
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, sum, vec_add)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, max, vec_max)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, min, vec_min)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, sum, vec_add)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, max, vec_max)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, min, vec_min)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min)
 #define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \
 inline scalartype v_reduce_##suffix(const _Tpvec& a)                               \
 {                                                                                  \
    _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8));                            \
    rs = func(rs, vec_sld(rs, rs, 4));                                             \
    return vec_extract(func(rs, vec_sld(rs, rs, 2)), 0);                           \
 }
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, max, vec_max)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max)
 OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min)
 inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
                                 const v_float32x4& c, const v_float32x4& d)
 {
    vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val));
    ac = vec_add(ac, vec_sld(ac, ac, 8));
    vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val));
    bd = vec_add(bd, vec_sld(bd, bd, 8));
    return v_float32x4(vec_mergeh(ac, bd));
 }
 /** Popcount **/
 template<typename _Tpvec>
 inline v_uint32x4 v_popcount(const _Tpvec& a)
 { return v_uint32x4(vec_popcntu(vec_uint4_c(a.val))); }
 /** Mask **/
 inline int v_signmask(const v_uint8x16& a)
 {
    vec_uchar16 sv  = vec_sr(a.val, vec_uchar16_sp(7));
    static const vec_uchar16 slm = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
    sv = vec_sl(sv, slm);
    vec_uint4 sv4 = vec_sum4s(sv, vec_uint4_z);
    static const vec_uint4 slm4 = {0, 0, 8, 8};
    sv4 = vec_sl(sv4, slm4);
    return vec_extract(vec_sums((vec_int4) sv4, vec_int4_z), 3);
 }
 inline int v_signmask(const v_int8x16& a)
 { return v_signmask(v_reinterpret_as_u8(a)); }
 inline int v_signmask(const v_int16x8& a)
 {
    static const vec_ushort8 slm = {0, 1, 2, 3, 4, 5, 6, 7};
    vec_short8 sv = vec_sr(a.val, vec_ushort8_sp(15));
    sv = vec_sl(sv, slm);
    vec_int4 svi = vec_int4_z;
    svi = vec_sums(vec_sum4s(sv, svi), svi);
    return vec_extract(svi, 3);
 }
 inline int v_signmask(const v_uint16x8& a)
 { return v_signmask(v_reinterpret_as_s16(a)); }
 inline int v_signmask(const v_int32x4& a)
 {
    static const vec_uint4 slm = {0, 1, 2, 3};
    vec_int4 sv = vec_sr(a.val, vec_uint4_sp(31));
    sv = vec_sl(sv, slm);
    sv = vec_sums(sv, vec_int4_z);
    return vec_extract(sv, 3);
 }
 inline int v_signmask(const v_uint32x4& a)
 { return v_signmask(v_reinterpret_as_s32(a)); }
 inline int v_signmask(const v_float32x4& a)
 { return v_signmask(v_reinterpret_as_s32(a)); }
 inline int v_signmask(const v_int64x2& a)
 {
    VSX_UNUSED(const vec_dword2) sv = vec_sr(a.val, vec_udword2_sp(63));
    return (int)vec_extract(sv, 0) | (int)vec_extract(sv, 1) << 1;
 }
 inline int v_signmask(const v_uint64x2& a)
 { return v_signmask(v_reinterpret_as_s64(a)); }
 inline int v_signmask(const v_float64x2& a)
 { return v_signmask(v_reinterpret_as_s64(a)); }
 template<typename _Tpvec>
 inline bool v_check_all(const _Tpvec& a)
 { return vec_all_lt(a.val, _Tpvec().val);}
 inline bool v_check_all(const v_uint8x16 &a)
 { return v_check_all(v_reinterpret_as_s8(a)); }
 inline bool v_check_all(const v_uint16x8 &a)
 { return v_check_all(v_reinterpret_as_s16(a)); }
 inline bool v_check_all(const v_uint32x4 &a)
 { return v_check_all(v_reinterpret_as_s32(a)); }
 template<typename _Tpvec>
 inline bool v_check_any(const _Tpvec& a)
 { return vec_any_lt(a.val, _Tpvec().val);}
 inline bool v_check_any(const v_uint8x16 &a)
 { return v_check_any(v_reinterpret_as_s8(a)); }
 inline bool v_check_any(const v_uint16x8 &a)
 { return v_check_any(v_reinterpret_as_s16(a)); }
 inline bool v_check_any(const v_uint32x4 &a)
 { return v_check_any(v_reinterpret_as_s32(a)); }
 ////////// Other math /////////
 /** Some frequent operations **/
 inline v_float32x4 v_sqrt(const v_float32x4& x)
 { return v_float32x4(vec_sqrt(x.val)); }
 inline v_float64x2 v_sqrt(const v_float64x2& x)
 { return v_float64x2(vec_sqrt(x.val)); }
 inline v_float32x4 v_invsqrt(const v_float32x4& x)
 { return v_float32x4(vec_rsqrt(x.val)); }
 inline v_float64x2 v_invsqrt(const v_float64x2& x)
 { return v_float64x2(vec_rsqrt(x.val)); }
 #define OPENCV_HAL_IMPL_VSX_MULADD(_Tpvec)                                  \
 inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b)                 \
 { return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \
 inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b)             \
 { return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); }           \
 inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c)   \
 { return _Tpvec(vec_madd(a.val, b.val, c.val)); }
 OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4)
 OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2)
 // TODO: exp, log, sin, cos
 /** Absolute values **/
 inline v_uint8x16 v_abs(const v_int8x16& x)
 { return v_uint8x16(vec_uchar16_c(vec_abs(x.val))); }
 inline v_uint16x8 v_abs(const v_int16x8& x)
 { return v_uint16x8(vec_ushort8_c(vec_abs(x.val))); }
 inline v_uint32x4 v_abs(const v_int32x4& x)
 { return v_uint32x4(vec_uint4_c(vec_abs(x.val))); }
 inline v_float32x4 v_abs(const v_float32x4& x)
 { return v_float32x4(vec_abs(x.val)); }
 inline v_float64x2 v_abs(const v_float64x2& x)
 { return v_float64x2(vec_abs(x.val)); }
 OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd)
 #define OPENCV_HAL_IMPL_VSX_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin)  \
 inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b)                       \
 { return _Tpvec2(cast(intrin(a.val, b.val))); }
 OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int8x16, v_uint8x16, vec_uchar16_c, v_absdiff, vec_absd)
 OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int16x8, v_uint16x8, vec_ushort8_c, v_absdiff, vec_absd)
 OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int32x4, v_uint32x4, vec_uint4_c, v_absdiff, vec_absd)
 OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int64x2, v_uint64x2, vec_udword2_c, v_absdiff, vec_absd)
 ////////// Conversions /////////
 /** Rounding **/
 inline v_int32x4 v_round(const v_float32x4& a)
 { return v_int32x4(vec_cts(vec_round(a.val))); }
 inline v_int32x4 v_round(const v_float64x2& a)
 { return v_int32x4(vec_mergesqo(vec_ctso(vec_round(a.val)), vec_int4_z)); }
 inline v_int32x4 v_floor(const v_float32x4& a)
 { return v_int32x4(vec_cts(vec_floor(a.val))); }
 inline v_int32x4 v_floor(const v_float64x2& a)
 { return v_int32x4(vec_mergesqo(vec_ctso(vec_floor(a.val)), vec_int4_z)); }
 inline v_int32x4 v_ceil(const v_float32x4& a)
 { return v_int32x4(vec_cts(vec_ceil(a.val))); }
 inline v_int32x4 v_ceil(const v_float64x2& a)
 { return v_int32x4(vec_mergesqo(vec_ctso(vec_ceil(a.val)), vec_int4_z)); }
 inline v_int32x4 v_trunc(const v_float32x4& a)
 { return v_int32x4(vec_cts(a.val)); }
 inline v_int32x4 v_trunc(const v_float64x2& a)
 { return v_int32x4(vec_mergesqo(vec_ctso(a.val), vec_int4_z)); }
 /** To float **/
 inline v_float32x4 v_cvt_f32(const v_int32x4& a)
 { return v_float32x4(vec_ctf(a.val)); }
 inline v_float32x4 v_cvt_f32(const v_float64x2& a)
 { return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); }
 inline v_float64x2 v_cvt_f64(const v_int32x4& a)
 { return v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); }
 inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
 { return v_float64x2(vec_ctdo(vec_mergel(a.val, a.val))); }
 inline v_float64x2 v_cvt_f64(const v_float32x4& a)
 { return v_float64x2(vec_cvfo(vec_mergeh(a.val, a.val))); }
 inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
 { return v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); }
 /** Reinterpret **/
 /** its up there with load and store operations **/
 ////////// Matrix operations /////////
 inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
 { return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
 inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
                            const v_float32x4& m1, const v_float32x4& m2,
                            const v_float32x4& m3)
 {
    const vec_float4 v0 = vec_splat(v.val, 0);
    const vec_float4 v1 = vec_splat(v.val, 1);
    const vec_float4 v2 = vec_splat(v.val, 2);
    VSX_UNUSED(const vec_float4) v3 = vec_splat(v.val, 3);
    return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val)))));
 }
 inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
                               const v_float32x4& m1, const v_float32x4& m2,
                               const v_float32x4& a)
 {
    const vec_float4 v0 = vec_splat(v.val, 0);
    const vec_float4 v1 = vec_splat(v.val, 1);
    const vec_float4 v2 = vec_splat(v.val, 2);
    return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, a.val))));
 }
 #define OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(_Tpvec, _Tpvec2)                        \
 inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1,                   \
                           const _Tpvec& a2, const _Tpvec& a3,                   \
                           _Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3)       \
 {                                                                                \
    _Tpvec2 a02 = vec_mergeh(a0.val, a2.val);                                    \
    _Tpvec2 a13 = vec_mergeh(a1.val, a3.val);                                    \
    b0.val = vec_mergeh(a02, a13);                                               \
    b1.val = vec_mergel(a02, a13);                                               \
    a02 = vec_mergel(a0.val, a2.val);                                            \
    a13 = vec_mergel(a1.val, a3.val);                                            \
    b2.val  = vec_mergeh(a02, a13);                                              \
    b3.val  = vec_mergel(a02, a13);                                              \
 }
 OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4)
 OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4)
 OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4)
 //! @name Check SIMD support
 //! @{
 //! @brief Check CPU capability of SIMD operation
 static inline bool hasSIMD128()
 {
    return (CV_CPU_HAS_SUPPORT_VSX) ? true : false;
 }
 //! @}
 CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
 //! @endcond
 }
 #endif // OPENCV_HAL_VSX_HPP
--- a/3rdparty/libopencv/include/opencv2/core/ippasync.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/ippasync.hpp
@ -0,0 +1,195 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2015, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_IPPASYNC_HPP
 #define OPENCV_CORE_IPPASYNC_HPP
 #ifdef HAVE_IPP_A
 #include "opencv2/core.hpp"
 #include <ipp_async_op.h>
 #include <ipp_async_accel.h>
 namespace cv
 {
 namespace hpp
 {
 /** @addtogroup core_ipp
 This section describes conversion between OpenCV and [Intel&reg; IPP Asynchronous
 C/C++](http://software.intel.com/en-us/intel-ipp-preview) library. [Getting Started
 Guide](http://registrationcenter.intel.com/irc_nas/3727/ipp_async_get_started.htm) help you to
 install the library, configure header and library build paths.
 */
 //! @{
    //! convert OpenCV data type to hppDataType
    inline int toHppType(const int cvType)
    {
        int depth = CV_MAT_DEPTH(cvType);
        int hppType = depth == CV_8U ? HPP_DATA_TYPE_8U :
                     depth == CV_16U ? HPP_DATA_TYPE_16U :
                     depth == CV_16S ? HPP_DATA_TYPE_16S :
                     depth == CV_32S ? HPP_DATA_TYPE_32S :
                     depth == CV_32F ? HPP_DATA_TYPE_32F :
                     depth == CV_64F ? HPP_DATA_TYPE_64F : -1;
        CV_Assert( hppType >= 0 );
        return hppType;
    }
    //! convert hppDataType to OpenCV data type
    inline int toCvType(const int hppType)
    {
        int cvType = hppType == HPP_DATA_TYPE_8U ? CV_8U :
                    hppType == HPP_DATA_TYPE_16U ? CV_16U :
                    hppType == HPP_DATA_TYPE_16S ? CV_16S :
                    hppType == HPP_DATA_TYPE_32S ? CV_32S :
                    hppType == HPP_DATA_TYPE_32F ? CV_32F :
                    hppType == HPP_DATA_TYPE_64F ? CV_64F : -1;
        CV_Assert( cvType >= 0 );
        return cvType;
    }
    /** @brief Convert hppiMatrix to Mat.
    This function allocates and initializes new matrix (if needed) that has the same size and type as
    input matrix. Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
    @param src input hppiMatrix.
    @param dst output matrix.
    @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
    @param cn number of channels.
     */
    inline void copyHppToMat(hppiMatrix* src, Mat& dst, hppAccel accel, int cn)
    {
        hppDataType type;
        hpp32u width, height;
        hppStatus sts;
        if (src == NULL)
            return dst.release();
        sts = hppiInquireMatrix(src, &type, &width, &height);
        CV_Assert( sts == HPP_STATUS_NO_ERROR);
        int matType = CV_MAKETYPE(toCvType(type), cn);
        CV_Assert(width%cn == 0);
        width /= cn;
        dst.create((int)height, (int)width, (int)matType);
        size_t newSize = (size_t)(height*(hpp32u)(dst.step));
        sts = hppiGetMatrixData(accel,src,(hpp32u)(dst.step),dst.data,&newSize);
        CV_Assert( sts == HPP_STATUS_NO_ERROR);
    }
    /** @brief Create Mat from hppiMatrix.
    This function allocates and initializes the Mat that has the same size and type as input matrix.
    Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
    @param src input hppiMatrix.
    @param accel accelerator instance (see hpp::getHpp for the list of acceleration framework types).
    @param cn number of channels.
    @sa howToUseIPPAconversion, hpp::copyHppToMat, hpp::getHpp.
     */
    inline Mat getMat(hppiMatrix* src, hppAccel accel, int cn)
    {
        Mat dst;
        copyHppToMat(src, dst, accel, cn);
        return dst;
    }
    /** @brief Create hppiMatrix from Mat.
    This function allocates and initializes the hppiMatrix that has the same size and type as input
    matrix, returns the hppiMatrix*.
    If you want to use zero-copy for GPU you should to have 4KB aligned matrix data. See details
    [hppiCreateSharedMatrix](http://software.intel.com/ru-ru/node/501697).
    Supports CV_8U, CV_16U, CV_16S, CV_32S, CV_32F, CV_64F.
    @note The hppiMatrix pointer to the image buffer in system memory refers to the src.data. Control
    the lifetime of the matrix and don't change its data, if there is no special need.
    @param src input matrix.
    @param accel accelerator instance. Supports type:
    -   **HPP_ACCEL_TYPE_CPU** - accelerated by optimized CPU instructions.
    -   **HPP_ACCEL_TYPE_GPU** - accelerated by GPU programmable units or fixed-function
        accelerators.
    -   **HPP_ACCEL_TYPE_ANY** - any acceleration or no acceleration available.
    @sa howToUseIPPAconversion, hpp::getMat
     */
    inline hppiMatrix* getHpp(const Mat& src, hppAccel accel)
    {
        int htype = toHppType(src.type());
        int cn = src.channels();
        CV_Assert(src.data);
        hppAccelType accelType = hppQueryAccelType(accel);
        if (accelType!=HPP_ACCEL_TYPE_CPU)
        {
            hpp32u pitch, size;
            hppQueryMatrixAllocParams(accel, src.cols*cn, src.rows, htype, &pitch, &size);
            if (pitch!=0 && size!=0)
                if ((int)(src.data)%4096==0 && pitch==(hpp32u)(src.step))
                {
                    return hppiCreateSharedMatrix(htype, src.cols*cn, src.rows, src.data, pitch, size);
                }
        }
        return hppiCreateMatrix(htype, src.cols*cn, src.rows, src.data, (hpp32s)(src.step));;
    }
 //! @}
 }}
 #endif
 #endif
--- a/3rdparty/libopencv/include/opencv2/core/mat.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/mat.hpp
--- a/3rdparty/libopencv/include/opencv2/core/mat.inl.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/mat.inl.hpp
--- a/3rdparty/libopencv/include/opencv2/core/matx.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/matx.hpp
--- a/3rdparty/libopencv/include/opencv2/core/neon_utils.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/neon_utils.hpp
@ -0,0 +1,128 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                          License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2015, Itseez Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_HAL_NEON_UTILS_HPP
 #define OPENCV_HAL_NEON_UTILS_HPP
 #include "opencv2/core/cvdef.h"
 //! @addtogroup core_utils_neon
 //! @{
 #if CV_NEON
 inline int32x2_t cv_vrnd_s32_f32(float32x2_t v)
 {
    static int32x2_t v_sign = vdup_n_s32(1 << 31),
        v_05 = vreinterpret_s32_f32(vdup_n_f32(0.5f));
    int32x2_t v_addition = vorr_s32(v_05, vand_s32(v_sign, vreinterpret_s32_f32(v)));
    return vcvt_s32_f32(vadd_f32(v, vreinterpret_f32_s32(v_addition)));
 }
 inline int32x4_t cv_vrndq_s32_f32(float32x4_t v)
 {
    static int32x4_t v_sign = vdupq_n_s32(1 << 31),
        v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
    int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(v)));
    return vcvtq_s32_f32(vaddq_f32(v, vreinterpretq_f32_s32(v_addition)));
 }
 inline uint32x2_t cv_vrnd_u32_f32(float32x2_t v)
 {
    static float32x2_t v_05 = vdup_n_f32(0.5f);
    return vcvt_u32_f32(vadd_f32(v, v_05));
 }
 inline uint32x4_t cv_vrndq_u32_f32(float32x4_t v)
 {
    static float32x4_t v_05 = vdupq_n_f32(0.5f);
    return vcvtq_u32_f32(vaddq_f32(v, v_05));
 }
 inline float32x4_t cv_vrecpq_f32(float32x4_t val)
 {
    float32x4_t reciprocal = vrecpeq_f32(val);
    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
    return reciprocal;
 }
 inline float32x2_t cv_vrecp_f32(float32x2_t val)
 {
    float32x2_t reciprocal = vrecpe_f32(val);
    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
    return reciprocal;
 }
 inline float32x4_t cv_vrsqrtq_f32(float32x4_t val)
 {
    float32x4_t e = vrsqrteq_f32(val);
    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
    return e;
 }
 inline float32x2_t cv_vrsqrt_f32(float32x2_t val)
 {
    float32x2_t e = vrsqrte_f32(val);
    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
    return e;
 }
 inline float32x4_t cv_vsqrtq_f32(float32x4_t val)
 {
    return cv_vrecpq_f32(cv_vrsqrtq_f32(val));
 }
 inline float32x2_t cv_vsqrt_f32(float32x2_t val)
 {
    return cv_vrecp_f32(cv_vrsqrt_f32(val));
 }
 #endif
 //! @}
 #endif // OPENCV_HAL_NEON_UTILS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/ocl.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/ocl.hpp
@ -0,0 +1,842 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OPENCL_HPP
 #define OPENCV_OPENCL_HPP
 #include "opencv2/core.hpp"
 namespace cv { namespace ocl {
 //! @addtogroup core_opencl
 //! @{
 CV_EXPORTS_W bool haveOpenCL();
 CV_EXPORTS_W bool useOpenCL();
 CV_EXPORTS_W bool haveAmdBlas();
 CV_EXPORTS_W bool haveAmdFft();
 CV_EXPORTS_W void setUseOpenCL(bool flag);
 CV_EXPORTS_W void finish();
 CV_EXPORTS bool haveSVM();
 class CV_EXPORTS Context;
 class CV_EXPORTS Device;
 class CV_EXPORTS Kernel;
 class CV_EXPORTS Program;
 class CV_EXPORTS ProgramSource;
 class CV_EXPORTS Queue;
 class CV_EXPORTS PlatformInfo;
 class CV_EXPORTS Image2D;
 class CV_EXPORTS Device
 {
 public:
    Device();
    explicit Device(void* d);
    Device(const Device& d);
    Device& operator = (const Device& d);
    ~Device();
    void set(void* d);
    enum
    {
        TYPE_DEFAULT     = (1 << 0),
        TYPE_CPU         = (1 << 1),
        TYPE_GPU         = (1 << 2),
        TYPE_ACCELERATOR = (1 << 3),
        TYPE_DGPU        = TYPE_GPU + (1 << 16),
        TYPE_IGPU        = TYPE_GPU + (1 << 17),
        TYPE_ALL         = 0xFFFFFFFF
    };
    String name() const;
    String extensions() const;
    bool isExtensionSupported(const String& extensionName) const;
    String version() const;
    String vendorName() const;
    String OpenCL_C_Version() const;
    String OpenCLVersion() const;
    int deviceVersionMajor() const;
    int deviceVersionMinor() const;
    String driverVersion() const;
    void* ptr() const;
    int type() const;
    int addressBits() const;
    bool available() const;
    bool compilerAvailable() const;
    bool linkerAvailable() const;
    enum
    {
        FP_DENORM=(1 << 0),
        FP_INF_NAN=(1 << 1),
        FP_ROUND_TO_NEAREST=(1 << 2),
        FP_ROUND_TO_ZERO=(1 << 3),
        FP_ROUND_TO_INF=(1 << 4),
        FP_FMA=(1 << 5),
        FP_SOFT_FLOAT=(1 << 6),
        FP_CORRECTLY_ROUNDED_DIVIDE_SQRT=(1 << 7)
    };
    int doubleFPConfig() const;
    int singleFPConfig() const;
    int halfFPConfig() const;
    bool endianLittle() const;
    bool errorCorrectionSupport() const;
    enum
    {
        EXEC_KERNEL=(1 << 0),
        EXEC_NATIVE_KERNEL=(1 << 1)
    };
    int executionCapabilities() const;
    size_t globalMemCacheSize() const;
    enum
    {
        NO_CACHE=0,
        READ_ONLY_CACHE=1,
        READ_WRITE_CACHE=2
    };
    int globalMemCacheType() const;
    int globalMemCacheLineSize() const;
    size_t globalMemSize() const;
    size_t localMemSize() const;
    enum
    {
        NO_LOCAL_MEM=0,
        LOCAL_IS_LOCAL=1,
        LOCAL_IS_GLOBAL=2
    };
    int localMemType() const;
    bool hostUnifiedMemory() const;
    bool imageSupport() const;
    bool imageFromBufferSupport() const;
    uint imagePitchAlignment() const;
    uint imageBaseAddressAlignment() const;
    /// deprecated, use isExtensionSupported() method (probably with "cl_khr_subgroups" value)
    bool intelSubgroupsSupport() const;
    size_t image2DMaxWidth() const;
    size_t image2DMaxHeight() const;
    size_t image3DMaxWidth() const;
    size_t image3DMaxHeight() const;
    size_t image3DMaxDepth() const;
    size_t imageMaxBufferSize() const;
    size_t imageMaxArraySize() const;
    enum
    {
        UNKNOWN_VENDOR=0,
        VENDOR_AMD=1,
        VENDOR_INTEL=2,
        VENDOR_NVIDIA=3
    };
    int vendorID() const;
    // FIXIT
    // dev.isAMD() doesn't work for OpenCL CPU devices from AMD OpenCL platform.
    // This method should use platform name instead of vendor name.
    // After fix restore code in arithm.cpp: ocl_compare()
    inline bool isAMD() const { return vendorID() == VENDOR_AMD; }
    inline bool isIntel() const { return vendorID() == VENDOR_INTEL; }
    inline bool isNVidia() const { return vendorID() == VENDOR_NVIDIA; }
    int maxClockFrequency() const;
    int maxComputeUnits() const;
    int maxConstantArgs() const;
    size_t maxConstantBufferSize() const;
    size_t maxMemAllocSize() const;
    size_t maxParameterSize() const;
    int maxReadImageArgs() const;
    int maxWriteImageArgs() const;
    int maxSamplers() const;
    size_t maxWorkGroupSize() const;
    int maxWorkItemDims() const;
    void maxWorkItemSizes(size_t*) const;
    int memBaseAddrAlign() const;
    int nativeVectorWidthChar() const;
    int nativeVectorWidthShort() const;
    int nativeVectorWidthInt() const;
    int nativeVectorWidthLong() const;
    int nativeVectorWidthFloat() const;
    int nativeVectorWidthDouble() const;
    int nativeVectorWidthHalf() const;
    int preferredVectorWidthChar() const;
    int preferredVectorWidthShort() const;
    int preferredVectorWidthInt() const;
    int preferredVectorWidthLong() const;
    int preferredVectorWidthFloat() const;
    int preferredVectorWidthDouble() const;
    int preferredVectorWidthHalf() const;
    size_t printfBufferSize() const;
    size_t profilingTimerResolution() const;
    static const Device& getDefault();
 protected:
    struct Impl;
    Impl* p;
 };
 class CV_EXPORTS Context
 {
 public:
    Context();
    explicit Context(int dtype);
    ~Context();
    Context(const Context& c);
    Context& operator = (const Context& c);
    bool create();
    bool create(int dtype);
    size_t ndevices() const;
    const Device& device(size_t idx) const;
    Program getProg(const ProgramSource& prog,
                    const String& buildopt, String& errmsg);
    void unloadProg(Program& prog);
    static Context& getDefault(bool initialize = true);
    void* ptr() const;
    friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
    bool useSVM() const;
    void setUseSVM(bool enabled);
    struct Impl;
    inline Impl* getImpl() const { return (Impl*)p; }
 //protected:
    Impl* p;
 };
 class CV_EXPORTS Platform
 {
 public:
    Platform();
    ~Platform();
    Platform(const Platform& p);
    Platform& operator = (const Platform& p);
    void* ptr() const;
    static Platform& getDefault();
    friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
 protected:
    struct Impl;
    Impl* p;
 };
 /** @brief Attaches OpenCL context to OpenCV
@note
  OpenCV will check if available OpenCL platform has platformName name, then assign context to
  OpenCV and call `clRetainContext` function. The deviceID device will be used as target device and
  new command queue will be created.
@param platformName name of OpenCL platform to attach, this string is used to check if platform is available to OpenCV at runtime
@param platformID ID of platform attached context was created for
@param context OpenCL context to be attached to OpenCV
@param deviceID ID of device, must be created from attached context
 */
 CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID);
 /** @brief Convert OpenCL buffer to UMat
@note
  OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV. Memory
  content is not copied from `clBuffer` to UMat. Instead, buffer handle assigned to UMat and
  `clRetainMemObject` is called.
@param cl_mem_buffer source clBuffer handle
@param step num of bytes in single row
@param rows number of rows
@param cols number of cols
@param type OpenCV type of image
@param dst destination UMat
 */
 CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst);
 /** @brief Convert OpenCL image2d_t to UMat
@note
  OpenCL `image2d_t` (cl_mem_image), should be compatible with OpenCV UMat formats. Memory content
  is copied from image to UMat with `clEnqueueCopyImageToBuffer` function.
@param cl_mem_image source image2d_t handle
@param dst destination UMat
 */
 CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
 // TODO Move to internal header
 void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
 class CV_EXPORTS Queue
 {
 public:
    Queue();
    explicit Queue(const Context& c, const Device& d=Device());
    ~Queue();
    Queue(const Queue& q);
    Queue& operator = (const Queue& q);
    bool create(const Context& c=Context(), const Device& d=Device());
    void finish();
    void* ptr() const;
    static Queue& getDefault();
    /// @brief Returns OpenCL command queue with enable profiling mode support
    const Queue& getProfilingQueue() const;
    struct Impl; friend struct Impl;
    inline Impl* getImpl() const { return p; }
 protected:
    Impl* p;
 };
 class CV_EXPORTS KernelArg
 {
 public:
    enum { LOCAL=1, READ_ONLY=2, WRITE_ONLY=4, READ_WRITE=6, CONSTANT=8, PTR_ONLY = 16, NO_SIZE=256 };
    KernelArg(int _flags, UMat* _m, int wscale=1, int iwscale=1, const void* _obj=0, size_t _sz=0);
    KernelArg();
    static KernelArg Local() { return KernelArg(LOCAL, 0); }
    static KernelArg PtrWriteOnly(const UMat& m)
    { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); }
    static KernelArg PtrReadOnly(const UMat& m)
    { return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); }
    static KernelArg PtrReadWrite(const UMat& m)
    { return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); }
    static KernelArg ReadWrite(const UMat& m, int wscale=1, int iwscale=1)
    { return KernelArg(READ_WRITE, (UMat*)&m, wscale, iwscale); }
    static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1, int iwscale=1)
    { return KernelArg(READ_WRITE+NO_SIZE, (UMat*)&m, wscale, iwscale); }
    static KernelArg ReadOnly(const UMat& m, int wscale=1, int iwscale=1)
    { return KernelArg(READ_ONLY, (UMat*)&m, wscale, iwscale); }
    static KernelArg WriteOnly(const UMat& m, int wscale=1, int iwscale=1)
    { return KernelArg(WRITE_ONLY, (UMat*)&m, wscale, iwscale); }
    static KernelArg ReadOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
    { return KernelArg(READ_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
    static KernelArg WriteOnlyNoSize(const UMat& m, int wscale=1, int iwscale=1)
    { return KernelArg(WRITE_ONLY+NO_SIZE, (UMat*)&m, wscale, iwscale); }
    static KernelArg Constant(const Mat& m);
    template<typename _Tp> static KernelArg Constant(const _Tp* arr, size_t n)
    { return KernelArg(CONSTANT, 0, 1, 1, (void*)arr, n); }
    int flags;
    UMat* m;
    const void* obj;
    size_t sz;
    int wscale, iwscale;
 };
 class CV_EXPORTS Kernel
 {
 public:
    Kernel();
    Kernel(const char* kname, const Program& prog);
    Kernel(const char* kname, const ProgramSource& prog,
           const String& buildopts = String(), String* errmsg=0);
    ~Kernel();
    Kernel(const Kernel& k);
    Kernel& operator = (const Kernel& k);
    bool empty() const;
    bool create(const char* kname, const Program& prog);
    bool create(const char* kname, const ProgramSource& prog,
                const String& buildopts, String* errmsg=0);
    int set(int i, const void* value, size_t sz);
    int set(int i, const Image2D& image2D);
    int set(int i, const UMat& m);
    int set(int i, const KernelArg& arg);
    template<typename _Tp> int set(int i, const _Tp& value)
    { return set(i, &value, sizeof(value)); }
    template<typename _Tp0>
    Kernel& args(const _Tp0& a0)
    {
        set(0, a0); return *this;
    }
    template<typename _Tp0, typename _Tp1>
    Kernel& args(const _Tp0& a0, const _Tp1& a1)
    {
        int i = set(0, a0); set(i, a1); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2)
    {
        int i = set(0, a0); i = set(i, a1); set(i, a2); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
                 const _Tp3& a3, const _Tp4& a4)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2);
        i = set(i, a3); set(i, a4); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2,
             typename _Tp3, typename _Tp4, typename _Tp5>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2,
                 const _Tp3& a3, const _Tp4& a4, const _Tp5& a5)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2);
        i = set(i, a3); i = set(i, a4); set(i, a5); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
             typename _Tp4, typename _Tp5, typename _Tp6>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
        i = set(i, a4); i = set(i, a5); set(i, a6); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3);
        i = set(i, a4); i = set(i, a5); i = set(i, a6); set(i, a7); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
             typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
                 const _Tp8& a8)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4);
        i = set(i, a5); i = set(i, a6); i = set(i, a7); set(i, a8); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3, typename _Tp4,
             typename _Tp5, typename _Tp6, typename _Tp7, typename _Tp8, typename _Tp9>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
                 const _Tp8& a8, const _Tp9& a9)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
        i = set(i, a6); i = set(i, a7); i = set(i, a8); set(i, a9); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
             typename _Tp8, typename _Tp9, typename _Tp10>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); set(i, a10); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); set(i, a11); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
                 const _Tp12& a12)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
        set(i, a12); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
             typename _Tp13>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
                 const _Tp12& a12, const _Tp13& a13)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
        i = set(i, a12); set(i, a13); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
             typename _Tp13, typename _Tp14>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
        i = set(i, a12); i = set(i, a13); set(i, a14); return *this;
    }
    template<typename _Tp0, typename _Tp1, typename _Tp2, typename _Tp3,
             typename _Tp4, typename _Tp5, typename _Tp6, typename _Tp7,
             typename _Tp8, typename _Tp9, typename _Tp10, typename _Tp11, typename _Tp12,
             typename _Tp13, typename _Tp14, typename _Tp15>
    Kernel& args(const _Tp0& a0, const _Tp1& a1, const _Tp2& a2, const _Tp3& a3,
                 const _Tp4& a4, const _Tp5& a5, const _Tp6& a6, const _Tp7& a7,
                 const _Tp8& a8, const _Tp9& a9, const _Tp10& a10, const _Tp11& a11,
                 const _Tp12& a12, const _Tp13& a13, const _Tp14& a14, const _Tp15& a15)
    {
        int i = set(0, a0); i = set(i, a1); i = set(i, a2); i = set(i, a3); i = set(i, a4); i = set(i, a5);
        i = set(i, a6); i = set(i, a7); i = set(i, a8); i = set(i, a9); i = set(i, a10); i = set(i, a11);
        i = set(i, a12); i = set(i, a13); i = set(i, a14); set(i, a15); return *this;
    }
    /** @brief Run the OpenCL kernel.
    @param dims the work problem dimensions. It is the length of globalsize and localsize. It can be either 1, 2 or 3.
    @param globalsize work items for each dimension. It is not the final globalsize passed to
      OpenCL. Each dimension will be adjusted to the nearest integer divisible by the corresponding
      value in localsize. If localsize is NULL, it will still be adjusted depending on dims. The
      adjusted values are greater than or equal to the original values.
    @param localsize work-group size for each dimension.
    @param sync specify whether to wait for OpenCL computation to finish before return.
    @param q command queue
    */
    bool run(int dims, size_t globalsize[],
             size_t localsize[], bool sync, const Queue& q=Queue());
    bool runTask(bool sync, const Queue& q=Queue());
    /** @brief Similar to synchronized run() call with returning of kernel execution time
     * Separate OpenCL command queue may be used (with CL_QUEUE_PROFILING_ENABLE)
     * @return Execution time in nanoseconds or negative number on error
     */
    int64 runProfiling(int dims, size_t globalsize[], size_t localsize[], const Queue& q=Queue());
    size_t workGroupSize() const;
    size_t preferedWorkGroupSizeMultiple() const;
    bool compileWorkGroupSize(size_t wsz[]) const;
    size_t localMemSize() const;
    void* ptr() const;
    struct Impl;
 protected:
    Impl* p;
 };
 class CV_EXPORTS Program
 {
 public:
    Program();
    Program(const ProgramSource& src,
            const String& buildflags, String& errmsg);
    Program(const Program& prog);
    Program& operator = (const Program& prog);
    ~Program();
    bool create(const ProgramSource& src,
                const String& buildflags, String& errmsg);
    void* ptr() const;
    /**
     * @brief Query device-specific program binary.
     *
     * Returns RAW OpenCL executable binary without additional attachments.
     *
     * @sa ProgramSource::fromBinary
     *
     * @param[out] binary output buffer
     */
    void getBinary(std::vector<char>& binary) const;
    struct Impl; friend struct Impl;
    inline Impl* getImpl() const { return (Impl*)p; }
 protected:
    Impl* p;
 public:
 #ifndef OPENCV_REMOVE_DEPRECATED_API
    // TODO Remove this
    CV_DEPRECATED bool read(const String& buf, const String& buildflags); // removed, use ProgramSource instead
    CV_DEPRECATED bool write(String& buf) const; // removed, use getBinary() method instead (RAW OpenCL binary)
    CV_DEPRECATED const ProgramSource& source() const; // implementation removed
    CV_DEPRECATED String getPrefix() const; // deprecated, implementation replaced
    CV_DEPRECATED static String getPrefix(const String& buildflags); // deprecated, implementation replaced
 #endif
 };
 class CV_EXPORTS ProgramSource
 {
 public:
    typedef uint64 hash_t; // deprecated
    ProgramSource();
    explicit ProgramSource(const String& module, const String& name, const String& codeStr, const String& codeHash);
    explicit ProgramSource(const String& prog); // deprecated
    explicit ProgramSource(const char* prog); // deprecated
    ~ProgramSource();
    ProgramSource(const ProgramSource& prog);
    ProgramSource& operator = (const ProgramSource& prog);
    const String& source() const; // deprecated
    hash_t hash() const; // deprecated
    /** @brief Describe OpenCL program binary.
     * Do not call clCreateProgramWithBinary() and/or clBuildProgram().
     *
     * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
     *
     * This kind of binary is not portable between platforms in general - it is specific to OpenCL vendor / device / driver version.
     *
     * @param module name of program owner module
     * @param name unique name of program (module+name is used as key for OpenCL program caching)
     * @param binary buffer address. See buffer lifetime requirement in description.
     * @param size buffer size
     * @param buildOptions additional program-related build options passed to clBuildProgram()
     * @return created ProgramSource object
     */
    static ProgramSource fromBinary(const String& module, const String& name,
            const unsigned char* binary, const size_t size,
            const cv::String& buildOptions = cv::String());
    /** @brief Describe OpenCL program in SPIR format.
     * Do not call clCreateProgramWithBinary() and/or clBuildProgram().
     *
     * Supports SPIR 1.2 by default (pass '-spir-std=X.Y' in buildOptions to override this behavior)
     *
     * Caller should guarantee binary buffer lifetime greater than ProgramSource object (and any of its copies).
     *
     * Programs in this format are portable between OpenCL implementations with 'khr_spir' extension:
     * https://www.khronos.org/registry/OpenCL/sdk/2.0/docs/man/xhtml/cl_khr_spir.html
     * (but they are not portable between different platforms: 32-bit / 64-bit)
     *
     * Note: these programs can't support vendor specific extensions, like 'cl_intel_subgroups'.
     *
     * @param module name of program owner module
     * @param name unique name of program (module+name is used as key for OpenCL program caching)
     * @param binary buffer address. See buffer lifetime requirement in description.
     * @param size buffer size
     * @param buildOptions additional program-related build options passed to clBuildProgram()
     *        (these options are added automatically: '-x spir' and '-spir-std=1.2')
     * @return created ProgramSource object.
     */
    static ProgramSource fromSPIR(const String& module, const String& name,
            const unsigned char* binary, const size_t size,
            const cv::String& buildOptions = cv::String());
    //OpenCL 2.1+ only
    //static Program fromSPIRV(const String& module, const String& name,
    //        const unsigned char* binary, const size_t size,
    //        const cv::String& buildOptions = cv::String());
    struct Impl; friend struct Impl;
    inline Impl* getImpl() const { return (Impl*)p; }
 protected:
    Impl* p;
 };
 class CV_EXPORTS PlatformInfo
 {
 public:
    PlatformInfo();
    explicit PlatformInfo(void* id);
    ~PlatformInfo();
    PlatformInfo(const PlatformInfo& i);
    PlatformInfo& operator =(const PlatformInfo& i);
    String name() const;
    String vendor() const;
    String version() const;
    int deviceNumber() const;
    void getDevice(Device& device, int d) const;
 protected:
    struct Impl;
    Impl* p;
 };
 CV_EXPORTS const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf);
 CV_EXPORTS const char* typeToStr(int t);
 CV_EXPORTS const char* memopTypeToStr(int t);
 CV_EXPORTS const char* vecopTypeToStr(int t);
 CV_EXPORTS const char* getOpenCLErrorString(int errorCode);
 CV_EXPORTS String kernelToStr(InputArray _kernel, int ddepth = -1, const char * name = NULL);
 CV_EXPORTS void getPlatfomsInfo(std::vector<PlatformInfo>& platform_info);
 enum OclVectorStrategy
 {
    // all matrices have its own vector width
    OCL_VECTOR_OWN = 0,
    // all matrices have maximal vector width among all matrices
    // (useful for cases when matrices have different data types)
    OCL_VECTOR_MAX = 1,
    // default strategy
    OCL_VECTOR_DEFAULT = OCL_VECTOR_OWN
 };
 CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
                                         InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
                                         InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
                                         OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
 CV_EXPORTS int checkOptimalVectorWidth(const int *vectorWidths,
                                       InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
                                       InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
                                       InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
                                       OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
 // with OCL_VECTOR_MAX strategy
 CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
                                            InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
                                            InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
 CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
 class CV_EXPORTS Image2D
 {
 public:
    Image2D();
    /**
    @param src UMat object from which to get image properties and data
    @param norm flag to enable the use of normalized channel data types
    @param alias flag indicating that the image should alias the src UMat. If true, changes to the
        image or src will be reflected in both objects.
    */
    explicit Image2D(const UMat &src, bool norm = false, bool alias = false);
    Image2D(const Image2D & i);
    ~Image2D();
    Image2D & operator = (const Image2D & i);
    /** Indicates if creating an aliased image should succeed.
    Depends on the underlying platform and the dimensions of the UMat.
    */
    static bool canCreateAlias(const UMat &u);
    /** Indicates if the image format is supported.
    */
    static bool isFormatSupported(int depth, int cn, bool norm);
    void* ptr() const;
 protected:
    struct Impl;
    Impl* p;
 };
 class CV_EXPORTS Timer
 {
 public:
    Timer(const Queue& q);
    ~Timer();
    void start();
    void stop();
    uint64 durationNS() const; //< duration in nanoseconds
 protected:
    struct Impl;
    Impl* const p;
 private:
    Timer(const Timer&); // disabled
    Timer& operator=(const Timer&); // disabled
 };
 CV_EXPORTS MatAllocator* getOpenCLAllocator();
 #ifdef __OPENCV_BUILD
 namespace internal {
 CV_EXPORTS bool isOpenCLForced();
 #define OCL_FORCE_CHECK(condition) (cv::ocl::internal::isOpenCLForced() || (condition))
 CV_EXPORTS bool isPerformanceCheckBypassed();
 #define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
 CV_EXPORTS bool isCLBuffer(UMat& u);
 } // namespace internal
 #endif
 //! @}
 }}
 #endif
--- a/3rdparty/libopencv/include/opencv2/core/ocl_genbase.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/ocl_genbase.hpp
@ -0,0 +1,69 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_OPENCL_GENBASE_HPP
 #define OPENCV_OPENCL_GENBASE_HPP
 //! @cond IGNORED
 namespace cv {
 namespace ocl {
 class ProgramSource;
 namespace internal {
 struct CV_EXPORTS ProgramEntry
 {
    const char* module;
    const char* name;
    const char* programCode;
    const char* programHash;
    ProgramSource* pProgramSource;
    operator ProgramSource& () const;
 };
 } } } // namespace
 //! @endcond
 #endif
--- a/3rdparty/libopencv/include/opencv2/core/opencl/ocl_defs.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/ocl_defs.hpp
@ -0,0 +1,75 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 // Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 #ifndef OPENCV_CORE_OPENCL_DEFS_HPP
 #define OPENCV_CORE_OPENCL_DEFS_HPP
 #include "opencv2/core/utility.hpp"
 #include "cvconfig.h"
 namespace cv { namespace ocl {
 #ifdef HAVE_OPENCL
 /// Call is similar to useOpenCL() but doesn't try to load OpenCL runtime or create OpenCL context
 CV_EXPORTS bool isOpenCLActivated();
 #else
 static inline bool isOpenCLActivated() { return false; }
 #endif
 }} // namespace
 //#define CV_OPENCL_RUN_ASSERT
 #ifdef HAVE_OPENCL
 #ifdef CV_OPENCL_RUN_VERBOSE
 #define CV_OCL_RUN_(condition, func, ...)                                   \
    {                                                                       \
        if (cv::ocl::isOpenCLActivated() && (condition) && func)            \
        {                                                                   \
            printf("%s: OpenCL implementation is running\n", CV_Func);      \
            fflush(stdout);                                                 \
            CV_IMPL_ADD(CV_IMPL_OCL);                                       \
            return __VA_ARGS__;                                             \
        }                                                                   \
        else                                                                \
        {                                                                   \
            printf("%s: Plain implementation is running\n", CV_Func);       \
            fflush(stdout);                                                 \
        }                                                                   \
    }
 #elif defined CV_OPENCL_RUN_ASSERT
 #define CV_OCL_RUN_(condition, func, ...)                                   \
    {                                                                       \
        if (cv::ocl::isOpenCLActivated() && (condition))                    \
        {                                                                   \
            if(func)                                                        \
            {                                                               \
                CV_IMPL_ADD(CV_IMPL_OCL);                                   \
            }                                                               \
            else                                                            \
            {                                                               \
                CV_Error(cv::Error::StsAssert, #func);                      \
            }                                                               \
            return __VA_ARGS__;                                             \
        }                                                                   \
    }
 #else
 #define CV_OCL_RUN_(condition, func, ...)                                   \
    if (cv::ocl::isOpenCLActivated() && (condition) && func)                \
    {                                                                       \
        CV_IMPL_ADD(CV_IMPL_OCL);                                           \
        return __VA_ARGS__;                                                 \
    }
 #endif
 #else
 #define CV_OCL_RUN_(condition, func, ...)
 #endif
 #define CV_OCL_RUN(condition, func) CV_OCL_RUN_(condition, func)
 #endif // OPENCV_CORE_OPENCL_DEFS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opencl/opencl_info.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/opencl_info.hpp
@ -0,0 +1,198 @@
 // This file is part of OpenCV project.
 // It is subject to the license terms in the LICENSE file found in the top-level directory
 // of this distribution and at http://opencv.org/license.html.
 #include <iostream>
 #include <opencv2/core.hpp>
 #include <opencv2/core/ocl.hpp>
 #ifndef DUMP_CONFIG_PROPERTY
 #define DUMP_CONFIG_PROPERTY(...)
 #endif
 #ifndef DUMP_MESSAGE_STDOUT
 #define DUMP_MESSAGE_STDOUT(...) do { std::cout << __VA_ARGS__ << std::endl; } while (false)
 #endif
 namespace cv {
 namespace {
 static std::string bytesToStringRepr(size_t value)
 {
    size_t b = value % 1024;
    value /= 1024;
    size_t kb = value % 1024;
    value /= 1024;
    size_t mb = value % 1024;
    value /= 1024;
    size_t gb = value;
    std::ostringstream stream;
    if (gb > 0)
        stream << gb << " GB ";
    if (mb > 0)
        stream << mb << " MB ";
    if (kb > 0)
        stream << kb << " KB ";
    if (b > 0)
        stream << b << " B";
    std::string s = stream.str();
    if (s[s.size() - 1] == ' ')
        s = s.substr(0, s.size() - 1);
    return s;
 }
 } // namespace
 static void dumpOpenCLInformation()
 {
    using namespace cv::ocl;
    try
    {
        if (!haveOpenCL() || !useOpenCL())
        {
            DUMP_MESSAGE_STDOUT("OpenCL is disabled");
            DUMP_CONFIG_PROPERTY("cv_ocl", "disabled");
            return;
        }
        std::vector<PlatformInfo> platforms;
        cv::ocl::getPlatfomsInfo(platforms);
        if (platforms.size() > 0)
        {
            DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
            for (size_t i = 0; i < platforms.size(); i++)
            {
                const PlatformInfo* platform = &platforms[i];
                DUMP_MESSAGE_STDOUT("    " << platform->name().c_str());
                Device current_device;
                for (int j = 0; j < platform->deviceNumber(); j++)
                {
                    platform->getDevice(current_device, j);
                    const char* deviceTypeStr = current_device.type() == Device::TYPE_CPU
                        ? ("CPU") : (current_device.type() == Device::TYPE_GPU ? current_device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown");
                    DUMP_MESSAGE_STDOUT( "        " << deviceTypeStr << ": " << current_device.name().c_str() << " (" << current_device.version().c_str() << ")");
                    DUMP_CONFIG_PROPERTY( cv::format("cv_ocl_platform_%d_device_%d", (int)i, (int)j ),
                        cv::format("(Platform=%s)(Type=%s)(Name=%s)(Version=%s)",
                        platform->name().c_str(), deviceTypeStr, current_device.name().c_str(), current_device.version().c_str()) );
                }
            }
        }
        else
        {
            DUMP_MESSAGE_STDOUT("OpenCL is not available");
            DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
            return;
        }
        const Device& device = Device::getDefault();
        if (!device.available())
            CV_ErrorNoReturn(Error::OpenCLInitError, "OpenCL device is not available");
        DUMP_MESSAGE_STDOUT("Current OpenCL device: ");
 #if 0
        DUMP_MESSAGE_STDOUT("    Platform = " << device.getPlatform().name());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_platformName", device.getPlatform().name());
 #endif
        const char* deviceTypeStr = device.type() == Device::TYPE_CPU
            ? ("CPU") : (device.type() == Device::TYPE_GPU ? device.hostUnifiedMemory() ? "iGPU" : "dGPU" : "unknown");
        DUMP_MESSAGE_STDOUT("    Type = " << deviceTypeStr);
        DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceType", deviceTypeStr);
        DUMP_MESSAGE_STDOUT("    Name = " << device.name());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceName", device.name());
        DUMP_MESSAGE_STDOUT("    Version = " << device.version());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_deviceVersion", device.version());
        DUMP_MESSAGE_STDOUT("    Driver version = " << device.driverVersion());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_driverVersion", device.driverVersion());
        DUMP_MESSAGE_STDOUT("    Address bits = " << device.addressBits());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_addressBits", device.addressBits());
        DUMP_MESSAGE_STDOUT("    Compute units = " << device.maxComputeUnits());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_maxComputeUnits", device.maxComputeUnits());
        DUMP_MESSAGE_STDOUT("    Max work group size = " << device.maxWorkGroupSize());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_maxWorkGroupSize", device.maxWorkGroupSize());
        std::string localMemorySizeStr = bytesToStringRepr(device.localMemSize());
        DUMP_MESSAGE_STDOUT("    Local memory size = " << localMemorySizeStr);
        DUMP_CONFIG_PROPERTY("cv_ocl_current_localMemSize", device.localMemSize());
        std::string maxMemAllocSizeStr = bytesToStringRepr(device.maxMemAllocSize());
        DUMP_MESSAGE_STDOUT("    Max memory allocation size = " << maxMemAllocSizeStr);
        DUMP_CONFIG_PROPERTY("cv_ocl_current_maxMemAllocSize", device.maxMemAllocSize());
        const char* doubleSupportStr = device.doubleFPConfig() > 0 ? "Yes" : "No";
        DUMP_MESSAGE_STDOUT("    Double support = " << doubleSupportStr);
        DUMP_CONFIG_PROPERTY("cv_ocl_current_haveDoubleSupport", device.doubleFPConfig() > 0);
        const char* isUnifiedMemoryStr = device.hostUnifiedMemory() ? "Yes" : "No";
        DUMP_MESSAGE_STDOUT("    Host unified memory = " << isUnifiedMemoryStr);
        DUMP_CONFIG_PROPERTY("cv_ocl_current_hostUnifiedMemory", device.hostUnifiedMemory());
        DUMP_MESSAGE_STDOUT("    Device extensions:");
        String extensionsStr = device.extensions();
        size_t pos = 0;
        while (pos < extensionsStr.size())
        {
            size_t pos2 = extensionsStr.find(' ', pos);
            if (pos2 == String::npos)
                pos2 = extensionsStr.size();
            if (pos2 > pos)
            {
                String extensionName = extensionsStr.substr(pos, pos2 - pos);
                DUMP_MESSAGE_STDOUT("        " << extensionName);
            }
            pos = pos2 + 1;
        }
        DUMP_CONFIG_PROPERTY("cv_ocl_current_extensions", extensionsStr.c_str());
        const char* haveAmdBlasStr = haveAmdBlas() ? "Yes" : "No";
        DUMP_MESSAGE_STDOUT("    Has AMD Blas = " << haveAmdBlasStr);
        DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdBlas", haveAmdBlas());
        const char* haveAmdFftStr = haveAmdFft() ? "Yes" : "No";
        DUMP_MESSAGE_STDOUT("    Has AMD Fft = " << haveAmdFftStr);
        DUMP_CONFIG_PROPERTY("cv_ocl_current_AmdFft", haveAmdFft());
        DUMP_MESSAGE_STDOUT("    Preferred vector width char = " << device.preferredVectorWidthChar());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthChar", device.preferredVectorWidthChar());
        DUMP_MESSAGE_STDOUT("    Preferred vector width short = " << device.preferredVectorWidthShort());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthShort", device.preferredVectorWidthShort());
        DUMP_MESSAGE_STDOUT("    Preferred vector width int = " << device.preferredVectorWidthInt());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthInt", device.preferredVectorWidthInt());
        DUMP_MESSAGE_STDOUT("    Preferred vector width long = " << device.preferredVectorWidthLong());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthLong", device.preferredVectorWidthLong());
        DUMP_MESSAGE_STDOUT("    Preferred vector width float = " << device.preferredVectorWidthFloat());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthFloat", device.preferredVectorWidthFloat());
        DUMP_MESSAGE_STDOUT("    Preferred vector width double = " << device.preferredVectorWidthDouble());
        DUMP_CONFIG_PROPERTY("cv_ocl_current_preferredVectorWidthDouble", device.preferredVectorWidthDouble());
    }
    catch (...)
    {
        DUMP_MESSAGE_STDOUT("Exception. Can't dump OpenCL info");
        DUMP_MESSAGE_STDOUT("OpenCL device not available");
        DUMP_CONFIG_PROPERTY("cv_ocl", "not available");
    }
 }
 #undef DUMP_MESSAGE_STDOUT
 #undef DUMP_CONFIG_PROPERTY
 } // namespace
--- a/3rdparty/libopencv/include/opencv2/core/opencl/opencl_svm.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/opencl_svm.hpp
@ -0,0 +1,81 @@
 /* See LICENSE file in the root OpenCV directory */
 #ifndef OPENCV_CORE_OPENCL_SVM_HPP
 #define OPENCV_CORE_OPENCL_SVM_HPP
 //
 // Internal usage only (binary compatibility is not guaranteed)
 //
 #ifndef __OPENCV_BUILD
 #error Internal header file
 #endif
 #if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
 #include "runtime/opencl_core.hpp"
 #include "runtime/opencl_svm_20.hpp"
 #include "runtime/opencl_svm_hsa_extension.hpp"
 namespace cv { namespace ocl { namespace svm {
 struct SVMCapabilities
 {
    enum Value
    {
        SVM_COARSE_GRAIN_BUFFER = (1 << 0),
        SVM_FINE_GRAIN_BUFFER = (1 << 1),
        SVM_FINE_GRAIN_SYSTEM = (1 << 2),
        SVM_ATOMICS = (1 << 3),
    };
    int value_;
    SVMCapabilities(int capabilities = 0) : value_(capabilities) { }
    operator int() const { return value_; }
    inline bool isNoSVMSupport() const { return value_ == 0; }
    inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; }
    inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; }
    inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; }
    inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; }
 };
 CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context);
 struct SVMFunctions
 {
    clSVMAllocAMD_fn fn_clSVMAlloc;
    clSVMFreeAMD_fn fn_clSVMFree;
    clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer;
    //clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
    //clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
    clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy;
    clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill;
    clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap;
    clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap;
    inline SVMFunctions()
        : fn_clSVMAlloc(NULL), fn_clSVMFree(NULL),
          fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/
          /*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL),
          fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL)
    {
        // nothing
    }
    inline bool isValid() const
    {
        return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer &&
                /*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy &&
                fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap;
    }
 };
 // We should guarantee that SVMFunctions lifetime is not less than context's lifetime
 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context);
 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags);
 }}} //namespace cv::ocl::svm
 #endif
 #endif // OPENCV_CORE_OPENCL_SVM_HPP
 /* End of file. */
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdblas.hpp
@ -0,0 +1,714 @@
 //
 // AUTOGENERATED, DO NOT EDIT
 //
 #ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
 #error "Invalid usage"
 #endif
 // generated by parser_clamdblas.py
 #define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_
 #define clAmdBlasCaxpy clAmdBlasCaxpy_
 #define clAmdBlasCcopy clAmdBlasCcopy_
 #define clAmdBlasCdotc clAmdBlasCdotc_
 #define clAmdBlasCdotu clAmdBlasCdotu_
 #define clAmdBlasCgbmv clAmdBlasCgbmv_
 #define clAmdBlasCgemm clAmdBlasCgemm_
 #define clAmdBlasCgemmEx clAmdBlasCgemmEx_
 #define clAmdBlasCgemv clAmdBlasCgemv_
 #define clAmdBlasCgemvEx clAmdBlasCgemvEx_
 #define clAmdBlasCgerc clAmdBlasCgerc_
 #define clAmdBlasCgeru clAmdBlasCgeru_
 #define clAmdBlasChbmv clAmdBlasChbmv_
 #define clAmdBlasChemm clAmdBlasChemm_
 #define clAmdBlasChemv clAmdBlasChemv_
 #define clAmdBlasCher clAmdBlasCher_
 #define clAmdBlasCher2 clAmdBlasCher2_
 #define clAmdBlasCher2k clAmdBlasCher2k_
 #define clAmdBlasCherk clAmdBlasCherk_
 #define clAmdBlasChpmv clAmdBlasChpmv_
 #define clAmdBlasChpr clAmdBlasChpr_
 #define clAmdBlasChpr2 clAmdBlasChpr2_
 #define clAmdBlasCrotg clAmdBlasCrotg_
 #define clAmdBlasCscal clAmdBlasCscal_
 #define clAmdBlasCsrot clAmdBlasCsrot_
 #define clAmdBlasCsscal clAmdBlasCsscal_
 #define clAmdBlasCswap clAmdBlasCswap_
 #define clAmdBlasCsymm clAmdBlasCsymm_
 #define clAmdBlasCsyr2k clAmdBlasCsyr2k_
 #define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_
 #define clAmdBlasCsyrk clAmdBlasCsyrk_
 #define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_
 #define clAmdBlasCtbmv clAmdBlasCtbmv_
 #define clAmdBlasCtbsv clAmdBlasCtbsv_
 #define clAmdBlasCtpmv clAmdBlasCtpmv_
 #define clAmdBlasCtpsv clAmdBlasCtpsv_
 #define clAmdBlasCtrmm clAmdBlasCtrmm_
 #define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_
 #define clAmdBlasCtrmv clAmdBlasCtrmv_
 #define clAmdBlasCtrsm clAmdBlasCtrsm_
 #define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_
 #define clAmdBlasCtrsv clAmdBlasCtrsv_
 #define clAmdBlasDasum clAmdBlasDasum_
 #define clAmdBlasDaxpy clAmdBlasDaxpy_
 #define clAmdBlasDcopy clAmdBlasDcopy_
 #define clAmdBlasDdot clAmdBlasDdot_
 #define clAmdBlasDgbmv clAmdBlasDgbmv_
 #define clAmdBlasDgemm clAmdBlasDgemm_
 #define clAmdBlasDgemmEx clAmdBlasDgemmEx_
 #define clAmdBlasDgemv clAmdBlasDgemv_
 #define clAmdBlasDgemvEx clAmdBlasDgemvEx_
 #define clAmdBlasDger clAmdBlasDger_
 #define clAmdBlasDnrm2 clAmdBlasDnrm2_
 #define clAmdBlasDrot clAmdBlasDrot_
 #define clAmdBlasDrotg clAmdBlasDrotg_
 #define clAmdBlasDrotm clAmdBlasDrotm_
 #define clAmdBlasDrotmg clAmdBlasDrotmg_
 #define clAmdBlasDsbmv clAmdBlasDsbmv_
 #define clAmdBlasDscal clAmdBlasDscal_
 #define clAmdBlasDspmv clAmdBlasDspmv_
 #define clAmdBlasDspr clAmdBlasDspr_
 #define clAmdBlasDspr2 clAmdBlasDspr2_
 #define clAmdBlasDswap clAmdBlasDswap_
 #define clAmdBlasDsymm clAmdBlasDsymm_
 #define clAmdBlasDsymv clAmdBlasDsymv_
 #define clAmdBlasDsymvEx clAmdBlasDsymvEx_
 #define clAmdBlasDsyr clAmdBlasDsyr_
 #define clAmdBlasDsyr2 clAmdBlasDsyr2_
 #define clAmdBlasDsyr2k clAmdBlasDsyr2k_
 #define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_
 #define clAmdBlasDsyrk clAmdBlasDsyrk_
 #define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_
 #define clAmdBlasDtbmv clAmdBlasDtbmv_
 #define clAmdBlasDtbsv clAmdBlasDtbsv_
 #define clAmdBlasDtpmv clAmdBlasDtpmv_
 #define clAmdBlasDtpsv clAmdBlasDtpsv_
 #define clAmdBlasDtrmm clAmdBlasDtrmm_
 #define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_
 #define clAmdBlasDtrmv clAmdBlasDtrmv_
 #define clAmdBlasDtrsm clAmdBlasDtrsm_
 #define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_
 #define clAmdBlasDtrsv clAmdBlasDtrsv_
 #define clAmdBlasDzasum clAmdBlasDzasum_
 #define clAmdBlasDznrm2 clAmdBlasDznrm2_
 #define clAmdBlasGetVersion clAmdBlasGetVersion_
 #define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_
 #define clAmdBlasSasum clAmdBlasSasum_
 #define clAmdBlasSaxpy clAmdBlasSaxpy_
 #define clAmdBlasScasum clAmdBlasScasum_
 #define clAmdBlasScnrm2 clAmdBlasScnrm2_
 #define clAmdBlasScopy clAmdBlasScopy_
 #define clAmdBlasSdot clAmdBlasSdot_
 #define clAmdBlasSetup clAmdBlasSetup_
 #define clAmdBlasSgbmv clAmdBlasSgbmv_
 #define clAmdBlasSgemm clAmdBlasSgemm_
 #define clAmdBlasSgemmEx clAmdBlasSgemmEx_
 #define clAmdBlasSgemv clAmdBlasSgemv_
 #define clAmdBlasSgemvEx clAmdBlasSgemvEx_
 #define clAmdBlasSger clAmdBlasSger_
 #define clAmdBlasSnrm2 clAmdBlasSnrm2_
 #define clAmdBlasSrot clAmdBlasSrot_
 #define clAmdBlasSrotg clAmdBlasSrotg_
 #define clAmdBlasSrotm clAmdBlasSrotm_
 #define clAmdBlasSrotmg clAmdBlasSrotmg_
 #define clAmdBlasSsbmv clAmdBlasSsbmv_
 #define clAmdBlasSscal clAmdBlasSscal_
 #define clAmdBlasSspmv clAmdBlasSspmv_
 #define clAmdBlasSspr clAmdBlasSspr_
 #define clAmdBlasSspr2 clAmdBlasSspr2_
 #define clAmdBlasSswap clAmdBlasSswap_
 #define clAmdBlasSsymm clAmdBlasSsymm_
 #define clAmdBlasSsymv clAmdBlasSsymv_
 #define clAmdBlasSsymvEx clAmdBlasSsymvEx_
 #define clAmdBlasSsyr clAmdBlasSsyr_
 #define clAmdBlasSsyr2 clAmdBlasSsyr2_
 #define clAmdBlasSsyr2k clAmdBlasSsyr2k_
 #define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_
 #define clAmdBlasSsyrk clAmdBlasSsyrk_
 #define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_
 #define clAmdBlasStbmv clAmdBlasStbmv_
 #define clAmdBlasStbsv clAmdBlasStbsv_
 #define clAmdBlasStpmv clAmdBlasStpmv_
 #define clAmdBlasStpsv clAmdBlasStpsv_
 #define clAmdBlasStrmm clAmdBlasStrmm_
 #define clAmdBlasStrmmEx clAmdBlasStrmmEx_
 #define clAmdBlasStrmv clAmdBlasStrmv_
 #define clAmdBlasStrsm clAmdBlasStrsm_
 #define clAmdBlasStrsmEx clAmdBlasStrsmEx_
 #define clAmdBlasStrsv clAmdBlasStrsv_
 #define clAmdBlasTeardown clAmdBlasTeardown_
 #define clAmdBlasZaxpy clAmdBlasZaxpy_
 #define clAmdBlasZcopy clAmdBlasZcopy_
 #define clAmdBlasZdotc clAmdBlasZdotc_
 #define clAmdBlasZdotu clAmdBlasZdotu_
 #define clAmdBlasZdrot clAmdBlasZdrot_
 #define clAmdBlasZdscal clAmdBlasZdscal_
 #define clAmdBlasZgbmv clAmdBlasZgbmv_
 #define clAmdBlasZgemm clAmdBlasZgemm_
 #define clAmdBlasZgemmEx clAmdBlasZgemmEx_
 #define clAmdBlasZgemv clAmdBlasZgemv_
 #define clAmdBlasZgemvEx clAmdBlasZgemvEx_
 #define clAmdBlasZgerc clAmdBlasZgerc_
 #define clAmdBlasZgeru clAmdBlasZgeru_
 #define clAmdBlasZhbmv clAmdBlasZhbmv_
 #define clAmdBlasZhemm clAmdBlasZhemm_
 #define clAmdBlasZhemv clAmdBlasZhemv_
 #define clAmdBlasZher clAmdBlasZher_
 #define clAmdBlasZher2 clAmdBlasZher2_
 #define clAmdBlasZher2k clAmdBlasZher2k_
 #define clAmdBlasZherk clAmdBlasZherk_
 #define clAmdBlasZhpmv clAmdBlasZhpmv_
 #define clAmdBlasZhpr clAmdBlasZhpr_
 #define clAmdBlasZhpr2 clAmdBlasZhpr2_
 #define clAmdBlasZrotg clAmdBlasZrotg_
 #define clAmdBlasZscal clAmdBlasZscal_
 #define clAmdBlasZswap clAmdBlasZswap_
 #define clAmdBlasZsymm clAmdBlasZsymm_
 #define clAmdBlasZsyr2k clAmdBlasZsyr2k_
 #define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_
 #define clAmdBlasZsyrk clAmdBlasZsyrk_
 #define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_
 #define clAmdBlasZtbmv clAmdBlasZtbmv_
 #define clAmdBlasZtbsv clAmdBlasZtbsv_
 #define clAmdBlasZtpmv clAmdBlasZtpmv_
 #define clAmdBlasZtpsv clAmdBlasZtpsv_
 #define clAmdBlasZtrmm clAmdBlasZtrmm_
 #define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_
 #define clAmdBlasZtrmv clAmdBlasZtrmv_
 #define clAmdBlasZtrsm clAmdBlasZtrsm_
 #define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_
 #define clAmdBlasZtrsv clAmdBlasZtrsv_
 #define clAmdBlasiCamax clAmdBlasiCamax_
 #define clAmdBlasiDamax clAmdBlasiDamax_
 #define clAmdBlasiSamax clAmdBlasiSamax_
 #define clAmdBlasiZamax clAmdBlasiZamax_
 #include <clAmdBlas.h>
 // generated by parser_clamdblas.py
 #undef clAmdBlasAddScratchImage
 //#define clAmdBlasAddScratchImage clAmdBlasAddScratchImage_pfn
 #undef clAmdBlasCaxpy
 //#define clAmdBlasCaxpy clAmdBlasCaxpy_pfn
 #undef clAmdBlasCcopy
 //#define clAmdBlasCcopy clAmdBlasCcopy_pfn
 #undef clAmdBlasCdotc
 //#define clAmdBlasCdotc clAmdBlasCdotc_pfn
 #undef clAmdBlasCdotu
 //#define clAmdBlasCdotu clAmdBlasCdotu_pfn
 #undef clAmdBlasCgbmv
 //#define clAmdBlasCgbmv clAmdBlasCgbmv_pfn
 #undef clAmdBlasCgemm
 //#define clAmdBlasCgemm clAmdBlasCgemm_pfn
 #undef clAmdBlasCgemmEx
 #define clAmdBlasCgemmEx clAmdBlasCgemmEx_pfn
 #undef clAmdBlasCgemv
 //#define clAmdBlasCgemv clAmdBlasCgemv_pfn
 #undef clAmdBlasCgemvEx
 //#define clAmdBlasCgemvEx clAmdBlasCgemvEx_pfn
 #undef clAmdBlasCgerc
 //#define clAmdBlasCgerc clAmdBlasCgerc_pfn
 #undef clAmdBlasCgeru
 //#define clAmdBlasCgeru clAmdBlasCgeru_pfn
 #undef clAmdBlasChbmv
 //#define clAmdBlasChbmv clAmdBlasChbmv_pfn
 #undef clAmdBlasChemm
 //#define clAmdBlasChemm clAmdBlasChemm_pfn
 #undef clAmdBlasChemv
 //#define clAmdBlasChemv clAmdBlasChemv_pfn
 #undef clAmdBlasCher
 //#define clAmdBlasCher clAmdBlasCher_pfn
 #undef clAmdBlasCher2
 //#define clAmdBlasCher2 clAmdBlasCher2_pfn
 #undef clAmdBlasCher2k
 //#define clAmdBlasCher2k clAmdBlasCher2k_pfn
 #undef clAmdBlasCherk
 //#define clAmdBlasCherk clAmdBlasCherk_pfn
 #undef clAmdBlasChpmv
 //#define clAmdBlasChpmv clAmdBlasChpmv_pfn
 #undef clAmdBlasChpr
 //#define clAmdBlasChpr clAmdBlasChpr_pfn
 #undef clAmdBlasChpr2
 //#define clAmdBlasChpr2 clAmdBlasChpr2_pfn
 #undef clAmdBlasCrotg
 //#define clAmdBlasCrotg clAmdBlasCrotg_pfn
 #undef clAmdBlasCscal
 //#define clAmdBlasCscal clAmdBlasCscal_pfn
 #undef clAmdBlasCsrot
 //#define clAmdBlasCsrot clAmdBlasCsrot_pfn
 #undef clAmdBlasCsscal
 //#define clAmdBlasCsscal clAmdBlasCsscal_pfn
 #undef clAmdBlasCswap
 //#define clAmdBlasCswap clAmdBlasCswap_pfn
 #undef clAmdBlasCsymm
 //#define clAmdBlasCsymm clAmdBlasCsymm_pfn
 #undef clAmdBlasCsyr2k
 //#define clAmdBlasCsyr2k clAmdBlasCsyr2k_pfn
 #undef clAmdBlasCsyr2kEx
 //#define clAmdBlasCsyr2kEx clAmdBlasCsyr2kEx_pfn
 #undef clAmdBlasCsyrk
 //#define clAmdBlasCsyrk clAmdBlasCsyrk_pfn
 #undef clAmdBlasCsyrkEx
 //#define clAmdBlasCsyrkEx clAmdBlasCsyrkEx_pfn
 #undef clAmdBlasCtbmv
 //#define clAmdBlasCtbmv clAmdBlasCtbmv_pfn
 #undef clAmdBlasCtbsv
 //#define clAmdBlasCtbsv clAmdBlasCtbsv_pfn
 #undef clAmdBlasCtpmv
 //#define clAmdBlasCtpmv clAmdBlasCtpmv_pfn
 #undef clAmdBlasCtpsv
 //#define clAmdBlasCtpsv clAmdBlasCtpsv_pfn
 #undef clAmdBlasCtrmm
 //#define clAmdBlasCtrmm clAmdBlasCtrmm_pfn
 #undef clAmdBlasCtrmmEx
 //#define clAmdBlasCtrmmEx clAmdBlasCtrmmEx_pfn
 #undef clAmdBlasCtrmv
 //#define clAmdBlasCtrmv clAmdBlasCtrmv_pfn
 #undef clAmdBlasCtrsm
 //#define clAmdBlasCtrsm clAmdBlasCtrsm_pfn
 #undef clAmdBlasCtrsmEx
 //#define clAmdBlasCtrsmEx clAmdBlasCtrsmEx_pfn
 #undef clAmdBlasCtrsv
 //#define clAmdBlasCtrsv clAmdBlasCtrsv_pfn
 #undef clAmdBlasDasum
 //#define clAmdBlasDasum clAmdBlasDasum_pfn
 #undef clAmdBlasDaxpy
 //#define clAmdBlasDaxpy clAmdBlasDaxpy_pfn
 #undef clAmdBlasDcopy
 //#define clAmdBlasDcopy clAmdBlasDcopy_pfn
 #undef clAmdBlasDdot
 //#define clAmdBlasDdot clAmdBlasDdot_pfn
 #undef clAmdBlasDgbmv
 //#define clAmdBlasDgbmv clAmdBlasDgbmv_pfn
 #undef clAmdBlasDgemm
 //#define clAmdBlasDgemm clAmdBlasDgemm_pfn
 #undef clAmdBlasDgemmEx
 #define clAmdBlasDgemmEx clAmdBlasDgemmEx_pfn
 #undef clAmdBlasDgemv
 //#define clAmdBlasDgemv clAmdBlasDgemv_pfn
 #undef clAmdBlasDgemvEx
 //#define clAmdBlasDgemvEx clAmdBlasDgemvEx_pfn
 #undef clAmdBlasDger
 //#define clAmdBlasDger clAmdBlasDger_pfn
 #undef clAmdBlasDnrm2
 //#define clAmdBlasDnrm2 clAmdBlasDnrm2_pfn
 #undef clAmdBlasDrot
 //#define clAmdBlasDrot clAmdBlasDrot_pfn
 #undef clAmdBlasDrotg
 //#define clAmdBlasDrotg clAmdBlasDrotg_pfn
 #undef clAmdBlasDrotm
 //#define clAmdBlasDrotm clAmdBlasDrotm_pfn
 #undef clAmdBlasDrotmg
 //#define clAmdBlasDrotmg clAmdBlasDrotmg_pfn
 #undef clAmdBlasDsbmv
 //#define clAmdBlasDsbmv clAmdBlasDsbmv_pfn
 #undef clAmdBlasDscal
 //#define clAmdBlasDscal clAmdBlasDscal_pfn
 #undef clAmdBlasDspmv
 //#define clAmdBlasDspmv clAmdBlasDspmv_pfn
 #undef clAmdBlasDspr
 //#define clAmdBlasDspr clAmdBlasDspr_pfn
 #undef clAmdBlasDspr2
 //#define clAmdBlasDspr2 clAmdBlasDspr2_pfn
 #undef clAmdBlasDswap
 //#define clAmdBlasDswap clAmdBlasDswap_pfn
 #undef clAmdBlasDsymm
 //#define clAmdBlasDsymm clAmdBlasDsymm_pfn
 #undef clAmdBlasDsymv
 //#define clAmdBlasDsymv clAmdBlasDsymv_pfn
 #undef clAmdBlasDsymvEx
 //#define clAmdBlasDsymvEx clAmdBlasDsymvEx_pfn
 #undef clAmdBlasDsyr
 //#define clAmdBlasDsyr clAmdBlasDsyr_pfn
 #undef clAmdBlasDsyr2
 //#define clAmdBlasDsyr2 clAmdBlasDsyr2_pfn
 #undef clAmdBlasDsyr2k
 //#define clAmdBlasDsyr2k clAmdBlasDsyr2k_pfn
 #undef clAmdBlasDsyr2kEx
 //#define clAmdBlasDsyr2kEx clAmdBlasDsyr2kEx_pfn
 #undef clAmdBlasDsyrk
 //#define clAmdBlasDsyrk clAmdBlasDsyrk_pfn
 #undef clAmdBlasDsyrkEx
 //#define clAmdBlasDsyrkEx clAmdBlasDsyrkEx_pfn
 #undef clAmdBlasDtbmv
 //#define clAmdBlasDtbmv clAmdBlasDtbmv_pfn
 #undef clAmdBlasDtbsv
 //#define clAmdBlasDtbsv clAmdBlasDtbsv_pfn
 #undef clAmdBlasDtpmv
 //#define clAmdBlasDtpmv clAmdBlasDtpmv_pfn
 #undef clAmdBlasDtpsv
 //#define clAmdBlasDtpsv clAmdBlasDtpsv_pfn
 #undef clAmdBlasDtrmm
 //#define clAmdBlasDtrmm clAmdBlasDtrmm_pfn
 #undef clAmdBlasDtrmmEx
 //#define clAmdBlasDtrmmEx clAmdBlasDtrmmEx_pfn
 #undef clAmdBlasDtrmv
 //#define clAmdBlasDtrmv clAmdBlasDtrmv_pfn
 #undef clAmdBlasDtrsm
 //#define clAmdBlasDtrsm clAmdBlasDtrsm_pfn
 #undef clAmdBlasDtrsmEx
 //#define clAmdBlasDtrsmEx clAmdBlasDtrsmEx_pfn
 #undef clAmdBlasDtrsv
 //#define clAmdBlasDtrsv clAmdBlasDtrsv_pfn
 #undef clAmdBlasDzasum
 //#define clAmdBlasDzasum clAmdBlasDzasum_pfn
 #undef clAmdBlasDznrm2
 //#define clAmdBlasDznrm2 clAmdBlasDznrm2_pfn
 #undef clAmdBlasGetVersion
 //#define clAmdBlasGetVersion clAmdBlasGetVersion_pfn
 #undef clAmdBlasRemoveScratchImage
 //#define clAmdBlasRemoveScratchImage clAmdBlasRemoveScratchImage_pfn
 #undef clAmdBlasSasum
 //#define clAmdBlasSasum clAmdBlasSasum_pfn
 #undef clAmdBlasSaxpy
 //#define clAmdBlasSaxpy clAmdBlasSaxpy_pfn
 #undef clAmdBlasScasum
 //#define clAmdBlasScasum clAmdBlasScasum_pfn
 #undef clAmdBlasScnrm2
 //#define clAmdBlasScnrm2 clAmdBlasScnrm2_pfn
 #undef clAmdBlasScopy
 //#define clAmdBlasScopy clAmdBlasScopy_pfn
 #undef clAmdBlasSdot
 //#define clAmdBlasSdot clAmdBlasSdot_pfn
 #undef clAmdBlasSetup
 #define clAmdBlasSetup clAmdBlasSetup_pfn
 #undef clAmdBlasSgbmv
 //#define clAmdBlasSgbmv clAmdBlasSgbmv_pfn
 #undef clAmdBlasSgemm
 //#define clAmdBlasSgemm clAmdBlasSgemm_pfn
 #undef clAmdBlasSgemmEx
 #define clAmdBlasSgemmEx clAmdBlasSgemmEx_pfn
 #undef clAmdBlasSgemv
 //#define clAmdBlasSgemv clAmdBlasSgemv_pfn
 #undef clAmdBlasSgemvEx
 //#define clAmdBlasSgemvEx clAmdBlasSgemvEx_pfn
 #undef clAmdBlasSger
 //#define clAmdBlasSger clAmdBlasSger_pfn
 #undef clAmdBlasSnrm2
 //#define clAmdBlasSnrm2 clAmdBlasSnrm2_pfn
 #undef clAmdBlasSrot
 //#define clAmdBlasSrot clAmdBlasSrot_pfn
 #undef clAmdBlasSrotg
 //#define clAmdBlasSrotg clAmdBlasSrotg_pfn
 #undef clAmdBlasSrotm
 //#define clAmdBlasSrotm clAmdBlasSrotm_pfn
 #undef clAmdBlasSrotmg
 //#define clAmdBlasSrotmg clAmdBlasSrotmg_pfn
 #undef clAmdBlasSsbmv
 //#define clAmdBlasSsbmv clAmdBlasSsbmv_pfn
 #undef clAmdBlasSscal
 //#define clAmdBlasSscal clAmdBlasSscal_pfn
 #undef clAmdBlasSspmv
 //#define clAmdBlasSspmv clAmdBlasSspmv_pfn
 #undef clAmdBlasSspr
 //#define clAmdBlasSspr clAmdBlasSspr_pfn
 #undef clAmdBlasSspr2
 //#define clAmdBlasSspr2 clAmdBlasSspr2_pfn
 #undef clAmdBlasSswap
 //#define clAmdBlasSswap clAmdBlasSswap_pfn
 #undef clAmdBlasSsymm
 //#define clAmdBlasSsymm clAmdBlasSsymm_pfn
 #undef clAmdBlasSsymv
 //#define clAmdBlasSsymv clAmdBlasSsymv_pfn
 #undef clAmdBlasSsymvEx
 //#define clAmdBlasSsymvEx clAmdBlasSsymvEx_pfn
 #undef clAmdBlasSsyr
 //#define clAmdBlasSsyr clAmdBlasSsyr_pfn
 #undef clAmdBlasSsyr2
 //#define clAmdBlasSsyr2 clAmdBlasSsyr2_pfn
 #undef clAmdBlasSsyr2k
 //#define clAmdBlasSsyr2k clAmdBlasSsyr2k_pfn
 #undef clAmdBlasSsyr2kEx
 //#define clAmdBlasSsyr2kEx clAmdBlasSsyr2kEx_pfn
 #undef clAmdBlasSsyrk
 //#define clAmdBlasSsyrk clAmdBlasSsyrk_pfn
 #undef clAmdBlasSsyrkEx
 //#define clAmdBlasSsyrkEx clAmdBlasSsyrkEx_pfn
 #undef clAmdBlasStbmv
 //#define clAmdBlasStbmv clAmdBlasStbmv_pfn
 #undef clAmdBlasStbsv
 //#define clAmdBlasStbsv clAmdBlasStbsv_pfn
 #undef clAmdBlasStpmv
 //#define clAmdBlasStpmv clAmdBlasStpmv_pfn
 #undef clAmdBlasStpsv
 //#define clAmdBlasStpsv clAmdBlasStpsv_pfn
 #undef clAmdBlasStrmm
 //#define clAmdBlasStrmm clAmdBlasStrmm_pfn
 #undef clAmdBlasStrmmEx
 //#define clAmdBlasStrmmEx clAmdBlasStrmmEx_pfn
 #undef clAmdBlasStrmv
 //#define clAmdBlasStrmv clAmdBlasStrmv_pfn
 #undef clAmdBlasStrsm
 //#define clAmdBlasStrsm clAmdBlasStrsm_pfn
 #undef clAmdBlasStrsmEx
 //#define clAmdBlasStrsmEx clAmdBlasStrsmEx_pfn
 #undef clAmdBlasStrsv
 //#define clAmdBlasStrsv clAmdBlasStrsv_pfn
 #undef clAmdBlasTeardown
 #define clAmdBlasTeardown clAmdBlasTeardown_pfn
 #undef clAmdBlasZaxpy
 //#define clAmdBlasZaxpy clAmdBlasZaxpy_pfn
 #undef clAmdBlasZcopy
 //#define clAmdBlasZcopy clAmdBlasZcopy_pfn
 #undef clAmdBlasZdotc
 //#define clAmdBlasZdotc clAmdBlasZdotc_pfn
 #undef clAmdBlasZdotu
 //#define clAmdBlasZdotu clAmdBlasZdotu_pfn
 #undef clAmdBlasZdrot
 //#define clAmdBlasZdrot clAmdBlasZdrot_pfn
 #undef clAmdBlasZdscal
 //#define clAmdBlasZdscal clAmdBlasZdscal_pfn
 #undef clAmdBlasZgbmv
 //#define clAmdBlasZgbmv clAmdBlasZgbmv_pfn
 #undef clAmdBlasZgemm
 //#define clAmdBlasZgemm clAmdBlasZgemm_pfn
 #undef clAmdBlasZgemmEx
 #define clAmdBlasZgemmEx clAmdBlasZgemmEx_pfn
 #undef clAmdBlasZgemv
 //#define clAmdBlasZgemv clAmdBlasZgemv_pfn
 #undef clAmdBlasZgemvEx
 //#define clAmdBlasZgemvEx clAmdBlasZgemvEx_pfn
 #undef clAmdBlasZgerc
 //#define clAmdBlasZgerc clAmdBlasZgerc_pfn
 #undef clAmdBlasZgeru
 //#define clAmdBlasZgeru clAmdBlasZgeru_pfn
 #undef clAmdBlasZhbmv
 //#define clAmdBlasZhbmv clAmdBlasZhbmv_pfn
 #undef clAmdBlasZhemm
 //#define clAmdBlasZhemm clAmdBlasZhemm_pfn
 #undef clAmdBlasZhemv
 //#define clAmdBlasZhemv clAmdBlasZhemv_pfn
 #undef clAmdBlasZher
 //#define clAmdBlasZher clAmdBlasZher_pfn
 #undef clAmdBlasZher2
 //#define clAmdBlasZher2 clAmdBlasZher2_pfn
 #undef clAmdBlasZher2k
 //#define clAmdBlasZher2k clAmdBlasZher2k_pfn
 #undef clAmdBlasZherk
 //#define clAmdBlasZherk clAmdBlasZherk_pfn
 #undef clAmdBlasZhpmv
 //#define clAmdBlasZhpmv clAmdBlasZhpmv_pfn
 #undef clAmdBlasZhpr
 //#define clAmdBlasZhpr clAmdBlasZhpr_pfn
 #undef clAmdBlasZhpr2
 //#define clAmdBlasZhpr2 clAmdBlasZhpr2_pfn
 #undef clAmdBlasZrotg
 //#define clAmdBlasZrotg clAmdBlasZrotg_pfn
 #undef clAmdBlasZscal
 //#define clAmdBlasZscal clAmdBlasZscal_pfn
 #undef clAmdBlasZswap
 //#define clAmdBlasZswap clAmdBlasZswap_pfn
 #undef clAmdBlasZsymm
 //#define clAmdBlasZsymm clAmdBlasZsymm_pfn
 #undef clAmdBlasZsyr2k
 //#define clAmdBlasZsyr2k clAmdBlasZsyr2k_pfn
 #undef clAmdBlasZsyr2kEx
 //#define clAmdBlasZsyr2kEx clAmdBlasZsyr2kEx_pfn
 #undef clAmdBlasZsyrk
 //#define clAmdBlasZsyrk clAmdBlasZsyrk_pfn
 #undef clAmdBlasZsyrkEx
 //#define clAmdBlasZsyrkEx clAmdBlasZsyrkEx_pfn
 #undef clAmdBlasZtbmv
 //#define clAmdBlasZtbmv clAmdBlasZtbmv_pfn
 #undef clAmdBlasZtbsv
 //#define clAmdBlasZtbsv clAmdBlasZtbsv_pfn
 #undef clAmdBlasZtpmv
 //#define clAmdBlasZtpmv clAmdBlasZtpmv_pfn
 #undef clAmdBlasZtpsv
 //#define clAmdBlasZtpsv clAmdBlasZtpsv_pfn
 #undef clAmdBlasZtrmm
 //#define clAmdBlasZtrmm clAmdBlasZtrmm_pfn
 #undef clAmdBlasZtrmmEx
 //#define clAmdBlasZtrmmEx clAmdBlasZtrmmEx_pfn
 #undef clAmdBlasZtrmv
 //#define clAmdBlasZtrmv clAmdBlasZtrmv_pfn
 #undef clAmdBlasZtrsm
 //#define clAmdBlasZtrsm clAmdBlasZtrsm_pfn
 #undef clAmdBlasZtrsmEx
 //#define clAmdBlasZtrsmEx clAmdBlasZtrsmEx_pfn
 #undef clAmdBlasZtrsv
 //#define clAmdBlasZtrsv clAmdBlasZtrsv_pfn
 #undef clAmdBlasiCamax
 //#define clAmdBlasiCamax clAmdBlasiCamax_pfn
 #undef clAmdBlasiDamax
 //#define clAmdBlasiDamax clAmdBlasiDamax_pfn
 #undef clAmdBlasiSamax
 //#define clAmdBlasiSamax clAmdBlasiSamax_pfn
 #undef clAmdBlasiZamax
 //#define clAmdBlasiZamax clAmdBlasiZamax_pfn
 // generated by parser_clamdblas.py
 //extern CL_RUNTIME_EXPORT cl_ulong (*clAmdBlasAddScratchImage)(cl_context context, size_t width, size_t height, clAmdBlasStatus* status);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCaxpy)(size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, FloatComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, FloatComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, float alpha, const cl_mem A, size_t offa, size_t lda, float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasChpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCscal)(size_t N, cl_float2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t lda, FloatComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, FloatComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, FloatComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasCtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDaxpy)(size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDger)(clAmdBlasOrder order, size_t M, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotg)(cl_mem DA, size_t offDA, cl_mem DB, size_t offDB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDrotmg)(cl_mem DD1, size_t offDD1, cl_mem DD2, size_t offDD2, cl_mem DX1, size_t offDX1, const cl_mem DY1, size_t offDY1, cl_mem DPARAM, size_t offDparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_double beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t lda, cl_double beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_double beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_double alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDzasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasDznrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasRemoveScratchImage)(cl_ulong imageID);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSaxpy)(size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScasum)(size_t N, cl_mem asum, size_t offAsum, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasScopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSdot)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSetup)();
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSger)(clAmdBlasOrder order, size_t M, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSnrm2)(size_t N, cl_mem NRM2, size_t offNRM2, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_float C, cl_float S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotg)(cl_mem SA, size_t offSA, cl_mem SB, size_t offSB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotm)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, const cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSrotmg)(cl_mem SD1, size_t offSD1, cl_mem SD2, size_t offSD2, cl_mem SX1, size_t offSX1, const cl_mem SY1, size_t offSY1, cl_mem SPARAM, size_t offSparam, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSscal)(size_t N, cl_float alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_float beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSspr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_float beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsymvEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, cl_float beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_float alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t lda, cl_float beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasSsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_float beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, cl_float alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasStrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 extern CL_RUNTIME_EXPORT void (*clAmdBlasTeardown)();
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZaxpy)(size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZcopy)(size_t N, const cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotc)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdotu)(size_t N, cl_mem dotProduct, size_t offDP, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdrot)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_double C, cl_double S, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZdscal)(size_t N, cl_double alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgbmv)(clAmdBlasOrder order, clAmdBlasTranspose trans, size_t M, size_t N, size_t KL, size_t KU, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemm)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemmEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, clAmdBlasTranspose transB, size_t M, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemv)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgemvEx)(clAmdBlasOrder order, clAmdBlasTranspose transA, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem x, size_t offx, int incx, DoubleComplex beta, cl_mem y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgerc)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZgeru)(clAmdBlasOrder order, size_t M, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, size_t K, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhemv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem X, size_t offx, int incx, DoubleComplex beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem A, size_t offa, size_t lda, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZher2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZherk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, double alpha, const cl_mem A, size_t offa, size_t lda, double beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem AP, size_t offa, const cl_mem X, size_t offx, int incx, cl_double2 beta, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double alpha, const cl_mem X, size_t offx, int incx, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZhpr2)(clAmdBlasOrder order, clAmdBlasUplo uplo, size_t N, cl_double2 alpha, const cl_mem X, size_t offx, int incx, const cl_mem Y, size_t offy, int incy, cl_mem AP, size_t offa, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZrotg)(cl_mem CA, size_t offCA, cl_mem CB, size_t offCB, cl_mem C, size_t offC, cl_mem S, size_t offS, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZscal)(size_t N, cl_double2 alpha, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZswap)(size_t N, cl_mem X, size_t offx, int incx, cl_mem Y, size_t offy, int incy, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsymm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, size_t M, size_t N, cl_double2 alpha, const cl_mem A, size_t offa, size_t lda, const cl_mem B, size_t offb, size_t ldb, cl_double2 beta, cl_mem C, size_t offc, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2k)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, const cl_mem B, size_t ldb, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyr2kEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transAB, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, const cl_mem B, size_t offB, size_t ldb, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrk)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t lda, DoubleComplex beta, cl_mem C, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZsyrkEx)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose transA, size_t N, size_t K, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, DoubleComplex beta, cl_mem C, size_t offC, size_t ldc, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtbsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, size_t K, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem AP, size_t offa, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtpsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrmv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsm)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t lda, cl_mem B, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsmEx)(clAmdBlasOrder order, clAmdBlasSide side, clAmdBlasUplo uplo, clAmdBlasTranspose transA, clAmdBlasDiag diag, size_t M, size_t N, DoubleComplex alpha, const cl_mem A, size_t offA, size_t lda, cl_mem B, size_t offB, size_t ldb, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasZtrsv)(clAmdBlasOrder order, clAmdBlasUplo uplo, clAmdBlasTranspose trans, clAmdBlasDiag diag, size_t N, const cl_mem A, size_t offa, size_t lda, cl_mem X, size_t offx, int incx, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiCamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiDamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiSamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
 //extern CL_RUNTIME_EXPORT clAmdBlasStatus (*clAmdBlasiZamax)(size_t N, cl_mem iMax, size_t offiMax, const cl_mem X, size_t offx, int incx, cl_mem scratchBuff, cl_uint numCommandQueues, cl_command_queue* commandQueues, cl_uint numEventsInWaitList, const cl_event* eventWaitList, cl_event* events);
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_clamdfft.hpp
@ -0,0 +1,142 @@
 //
 // AUTOGENERATED, DO NOT EDIT
 //
 #ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
 #error "Invalid usage"
 #endif
 // generated by parser_clamdfft.py
 #define clAmdFftBakePlan clAmdFftBakePlan_
 #define clAmdFftCopyPlan clAmdFftCopyPlan_
 #define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_
 #define clAmdFftDestroyPlan clAmdFftDestroyPlan_
 #define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_
 #define clAmdFftGetLayout clAmdFftGetLayout_
 #define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_
 #define clAmdFftGetPlanContext clAmdFftGetPlanContext_
 #define clAmdFftGetPlanDim clAmdFftGetPlanDim_
 #define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_
 #define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_
 #define clAmdFftGetPlanLength clAmdFftGetPlanLength_
 #define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_
 #define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_
 #define clAmdFftGetPlanScale clAmdFftGetPlanScale_
 #define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_
 #define clAmdFftGetResultLocation clAmdFftGetResultLocation_
 #define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_
 #define clAmdFftGetVersion clAmdFftGetVersion_
 #define clAmdFftSetLayout clAmdFftSetLayout_
 #define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_
 #define clAmdFftSetPlanDim clAmdFftSetPlanDim_
 #define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_
 #define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_
 #define clAmdFftSetPlanLength clAmdFftSetPlanLength_
 #define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_
 #define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_
 #define clAmdFftSetPlanScale clAmdFftSetPlanScale_
 #define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_
 #define clAmdFftSetResultLocation clAmdFftSetResultLocation_
 #define clAmdFftSetup clAmdFftSetup_
 #define clAmdFftTeardown clAmdFftTeardown_
 #include <clAmdFft.h>
 // generated by parser_clamdfft.py
 #undef clAmdFftBakePlan
 #define clAmdFftBakePlan clAmdFftBakePlan_pfn
 #undef clAmdFftCopyPlan
 //#define clAmdFftCopyPlan clAmdFftCopyPlan_pfn
 #undef clAmdFftCreateDefaultPlan
 #define clAmdFftCreateDefaultPlan clAmdFftCreateDefaultPlan_pfn
 #undef clAmdFftDestroyPlan
 #define clAmdFftDestroyPlan clAmdFftDestroyPlan_pfn
 #undef clAmdFftEnqueueTransform
 #define clAmdFftEnqueueTransform clAmdFftEnqueueTransform_pfn
 #undef clAmdFftGetLayout
 //#define clAmdFftGetLayout clAmdFftGetLayout_pfn
 #undef clAmdFftGetPlanBatchSize
 //#define clAmdFftGetPlanBatchSize clAmdFftGetPlanBatchSize_pfn
 #undef clAmdFftGetPlanContext
 //#define clAmdFftGetPlanContext clAmdFftGetPlanContext_pfn
 #undef clAmdFftGetPlanDim
 //#define clAmdFftGetPlanDim clAmdFftGetPlanDim_pfn
 #undef clAmdFftGetPlanDistance
 //#define clAmdFftGetPlanDistance clAmdFftGetPlanDistance_pfn
 #undef clAmdFftGetPlanInStride
 //#define clAmdFftGetPlanInStride clAmdFftGetPlanInStride_pfn
 #undef clAmdFftGetPlanLength
 //#define clAmdFftGetPlanLength clAmdFftGetPlanLength_pfn
 #undef clAmdFftGetPlanOutStride
 //#define clAmdFftGetPlanOutStride clAmdFftGetPlanOutStride_pfn
 #undef clAmdFftGetPlanPrecision
 //#define clAmdFftGetPlanPrecision clAmdFftGetPlanPrecision_pfn
 #undef clAmdFftGetPlanScale
 //#define clAmdFftGetPlanScale clAmdFftGetPlanScale_pfn
 #undef clAmdFftGetPlanTransposeResult
 //#define clAmdFftGetPlanTransposeResult clAmdFftGetPlanTransposeResult_pfn
 #undef clAmdFftGetResultLocation
 //#define clAmdFftGetResultLocation clAmdFftGetResultLocation_pfn
 #undef clAmdFftGetTmpBufSize
 #define clAmdFftGetTmpBufSize clAmdFftGetTmpBufSize_pfn
 #undef clAmdFftGetVersion
 #define clAmdFftGetVersion clAmdFftGetVersion_pfn
 #undef clAmdFftSetLayout
 #define clAmdFftSetLayout clAmdFftSetLayout_pfn
 #undef clAmdFftSetPlanBatchSize
 #define clAmdFftSetPlanBatchSize clAmdFftSetPlanBatchSize_pfn
 #undef clAmdFftSetPlanDim
 //#define clAmdFftSetPlanDim clAmdFftSetPlanDim_pfn
 #undef clAmdFftSetPlanDistance
 #define clAmdFftSetPlanDistance clAmdFftSetPlanDistance_pfn
 #undef clAmdFftSetPlanInStride
 #define clAmdFftSetPlanInStride clAmdFftSetPlanInStride_pfn
 #undef clAmdFftSetPlanLength
 //#define clAmdFftSetPlanLength clAmdFftSetPlanLength_pfn
 #undef clAmdFftSetPlanOutStride
 #define clAmdFftSetPlanOutStride clAmdFftSetPlanOutStride_pfn
 #undef clAmdFftSetPlanPrecision
 #define clAmdFftSetPlanPrecision clAmdFftSetPlanPrecision_pfn
 #undef clAmdFftSetPlanScale
 #define clAmdFftSetPlanScale clAmdFftSetPlanScale_pfn
 #undef clAmdFftSetPlanTransposeResult
 //#define clAmdFftSetPlanTransposeResult clAmdFftSetPlanTransposeResult_pfn
 #undef clAmdFftSetResultLocation
 #define clAmdFftSetResultLocation clAmdFftSetResultLocation_pfn
 #undef clAmdFftSetup
 #define clAmdFftSetup clAmdFftSetup_pfn
 #undef clAmdFftTeardown
 #define clAmdFftTeardown clAmdFftTeardown_pfn
 // generated by parser_clamdfft.py
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftBakePlan)(clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, void (CL_CALLBACK* pfn_notify) (clAmdFftPlanHandle plHandle, void* user_data), void* user_data);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCopyPlan)(clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftCreateDefaultPlan)(clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, const size_t* clLengths);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftDestroyPlan)(clAmdFftPlanHandle* plHandle);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftEnqueueTransform)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_uint numQueuesAndEvents, cl_command_queue* commQueues, cl_uint numWaitEvents, const cl_event* waitEvents, cl_event* outEvents, cl_mem* inputBuffers, cl_mem* outputBuffers, cl_mem tmpBuffer);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetLayout)(const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanBatchSize)(const clAmdFftPlanHandle plHandle, size_t* batchSize);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanContext)(const clAmdFftPlanHandle plHandle, cl_context* context);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDim)(const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanDistance)(const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanInStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanLength)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanOutStride)(const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanPrecision)(const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanScale)(const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetPlanTransposeResult)(const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed* transposed);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetResultLocation)(const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetTmpBufSize)(const clAmdFftPlanHandle plHandle, size_t* buffersize);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftGetVersion)(cl_uint* major, cl_uint* minor, cl_uint* patch);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetLayout)(clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanBatchSize)(clAmdFftPlanHandle plHandle, size_t batchSize);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDim)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanDistance)(clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanInStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanLength)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanOutStride)(clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanPrecision)(clAmdFftPlanHandle plHandle, clAmdFftPrecision precision);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanScale)(clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale);
 //extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetPlanTransposeResult)(clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetResultLocation)(clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftSetup)(const clAmdFftSetupData* setupData);
 extern CL_RUNTIME_EXPORT clAmdFftStatus (*clAmdFftTeardown)();
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_core.hpp
@ -0,0 +1,370 @@
 //
 // AUTOGENERATED, DO NOT EDIT
 //
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP
 #error "Invalid usage"
 #endif
 // generated by parser_cl.py
 #define clBuildProgram clBuildProgram_
 #define clCompileProgram clCompileProgram_
 #define clCreateBuffer clCreateBuffer_
 #define clCreateCommandQueue clCreateCommandQueue_
 #define clCreateContext clCreateContext_
 #define clCreateContextFromType clCreateContextFromType_
 #define clCreateImage clCreateImage_
 #define clCreateImage2D clCreateImage2D_
 #define clCreateImage3D clCreateImage3D_
 #define clCreateKernel clCreateKernel_
 #define clCreateKernelsInProgram clCreateKernelsInProgram_
 #define clCreateProgramWithBinary clCreateProgramWithBinary_
 #define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_
 #define clCreateProgramWithSource clCreateProgramWithSource_
 #define clCreateSampler clCreateSampler_
 #define clCreateSubBuffer clCreateSubBuffer_
 #define clCreateSubDevices clCreateSubDevices_
 #define clCreateUserEvent clCreateUserEvent_
 #define clEnqueueBarrier clEnqueueBarrier_
 #define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_
 #define clEnqueueCopyBuffer clEnqueueCopyBuffer_
 #define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_
 #define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_
 #define clEnqueueCopyImage clEnqueueCopyImage_
 #define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_
 #define clEnqueueFillBuffer clEnqueueFillBuffer_
 #define clEnqueueFillImage clEnqueueFillImage_
 #define clEnqueueMapBuffer clEnqueueMapBuffer_
 #define clEnqueueMapImage clEnqueueMapImage_
 #define clEnqueueMarker clEnqueueMarker_
 #define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_
 #define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_
 #define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_
 #define clEnqueueNativeKernel clEnqueueNativeKernel_
 #define clEnqueueReadBuffer clEnqueueReadBuffer_
 #define clEnqueueReadBufferRect clEnqueueReadBufferRect_
 #define clEnqueueReadImage clEnqueueReadImage_
 #define clEnqueueTask clEnqueueTask_
 #define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_
 #define clEnqueueWaitForEvents clEnqueueWaitForEvents_
 #define clEnqueueWriteBuffer clEnqueueWriteBuffer_
 #define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_
 #define clEnqueueWriteImage clEnqueueWriteImage_
 #define clFinish clFinish_
 #define clFlush clFlush_
 #define clGetCommandQueueInfo clGetCommandQueueInfo_
 #define clGetContextInfo clGetContextInfo_
 #define clGetDeviceIDs clGetDeviceIDs_
 #define clGetDeviceInfo clGetDeviceInfo_
 #define clGetEventInfo clGetEventInfo_
 #define clGetEventProfilingInfo clGetEventProfilingInfo_
 #define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_
 #define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_
 #define clGetImageInfo clGetImageInfo_
 #define clGetKernelArgInfo clGetKernelArgInfo_
 #define clGetKernelInfo clGetKernelInfo_
 #define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_
 #define clGetMemObjectInfo clGetMemObjectInfo_
 #define clGetPlatformIDs clGetPlatformIDs_
 #define clGetPlatformInfo clGetPlatformInfo_
 #define clGetProgramBuildInfo clGetProgramBuildInfo_
 #define clGetProgramInfo clGetProgramInfo_
 #define clGetSamplerInfo clGetSamplerInfo_
 #define clGetSupportedImageFormats clGetSupportedImageFormats_
 #define clLinkProgram clLinkProgram_
 #define clReleaseCommandQueue clReleaseCommandQueue_
 #define clReleaseContext clReleaseContext_
 #define clReleaseDevice clReleaseDevice_
 #define clReleaseEvent clReleaseEvent_
 #define clReleaseKernel clReleaseKernel_
 #define clReleaseMemObject clReleaseMemObject_
 #define clReleaseProgram clReleaseProgram_
 #define clReleaseSampler clReleaseSampler_
 #define clRetainCommandQueue clRetainCommandQueue_
 #define clRetainContext clRetainContext_
 #define clRetainDevice clRetainDevice_
 #define clRetainEvent clRetainEvent_
 #define clRetainKernel clRetainKernel_
 #define clRetainMemObject clRetainMemObject_
 #define clRetainProgram clRetainProgram_
 #define clRetainSampler clRetainSampler_
 #define clSetEventCallback clSetEventCallback_
 #define clSetKernelArg clSetKernelArg_
 #define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_
 #define clSetUserEventStatus clSetUserEventStatus_
 #define clUnloadCompiler clUnloadCompiler_
 #define clUnloadPlatformCompiler clUnloadPlatformCompiler_
 #define clWaitForEvents clWaitForEvents_
 #if defined __APPLE__
 #include <OpenCL/cl.h>
 #else
 #include <CL/cl.h>
 #endif
 // generated by parser_cl.py
 #undef clBuildProgram
 #define clBuildProgram clBuildProgram_pfn
 #undef clCompileProgram
 #define clCompileProgram clCompileProgram_pfn
 #undef clCreateBuffer
 #define clCreateBuffer clCreateBuffer_pfn
 #undef clCreateCommandQueue
 #define clCreateCommandQueue clCreateCommandQueue_pfn
 #undef clCreateContext
 #define clCreateContext clCreateContext_pfn
 #undef clCreateContextFromType
 #define clCreateContextFromType clCreateContextFromType_pfn
 #undef clCreateImage
 #define clCreateImage clCreateImage_pfn
 #undef clCreateImage2D
 #define clCreateImage2D clCreateImage2D_pfn
 #undef clCreateImage3D
 #define clCreateImage3D clCreateImage3D_pfn
 #undef clCreateKernel
 #define clCreateKernel clCreateKernel_pfn
 #undef clCreateKernelsInProgram
 #define clCreateKernelsInProgram clCreateKernelsInProgram_pfn
 #undef clCreateProgramWithBinary
 #define clCreateProgramWithBinary clCreateProgramWithBinary_pfn
 #undef clCreateProgramWithBuiltInKernels
 #define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_pfn
 #undef clCreateProgramWithSource
 #define clCreateProgramWithSource clCreateProgramWithSource_pfn
 #undef clCreateSampler
 #define clCreateSampler clCreateSampler_pfn
 #undef clCreateSubBuffer
 #define clCreateSubBuffer clCreateSubBuffer_pfn
 #undef clCreateSubDevices
 #define clCreateSubDevices clCreateSubDevices_pfn
 #undef clCreateUserEvent
 #define clCreateUserEvent clCreateUserEvent_pfn
 #undef clEnqueueBarrier
 #define clEnqueueBarrier clEnqueueBarrier_pfn
 #undef clEnqueueBarrierWithWaitList
 #define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_pfn
 #undef clEnqueueCopyBuffer
 #define clEnqueueCopyBuffer clEnqueueCopyBuffer_pfn
 #undef clEnqueueCopyBufferRect
 #define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_pfn
 #undef clEnqueueCopyBufferToImage
 #define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_pfn
 #undef clEnqueueCopyImage
 #define clEnqueueCopyImage clEnqueueCopyImage_pfn
 #undef clEnqueueCopyImageToBuffer
 #define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_pfn
 #undef clEnqueueFillBuffer
 #define clEnqueueFillBuffer clEnqueueFillBuffer_pfn
 #undef clEnqueueFillImage
 #define clEnqueueFillImage clEnqueueFillImage_pfn
 #undef clEnqueueMapBuffer
 #define clEnqueueMapBuffer clEnqueueMapBuffer_pfn
 #undef clEnqueueMapImage
 #define clEnqueueMapImage clEnqueueMapImage_pfn
 #undef clEnqueueMarker
 #define clEnqueueMarker clEnqueueMarker_pfn
 #undef clEnqueueMarkerWithWaitList
 #define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_pfn
 #undef clEnqueueMigrateMemObjects
 #define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_pfn
 #undef clEnqueueNDRangeKernel
 #define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_pfn
 #undef clEnqueueNativeKernel
 #define clEnqueueNativeKernel clEnqueueNativeKernel_pfn
 #undef clEnqueueReadBuffer
 #define clEnqueueReadBuffer clEnqueueReadBuffer_pfn
 #undef clEnqueueReadBufferRect
 #define clEnqueueReadBufferRect clEnqueueReadBufferRect_pfn
 #undef clEnqueueReadImage
 #define clEnqueueReadImage clEnqueueReadImage_pfn
 #undef clEnqueueTask
 #define clEnqueueTask clEnqueueTask_pfn
 #undef clEnqueueUnmapMemObject
 #define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_pfn
 #undef clEnqueueWaitForEvents
 #define clEnqueueWaitForEvents clEnqueueWaitForEvents_pfn
 #undef clEnqueueWriteBuffer
 #define clEnqueueWriteBuffer clEnqueueWriteBuffer_pfn
 #undef clEnqueueWriteBufferRect
 #define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_pfn
 #undef clEnqueueWriteImage
 #define clEnqueueWriteImage clEnqueueWriteImage_pfn
 #undef clFinish
 #define clFinish clFinish_pfn
 #undef clFlush
 #define clFlush clFlush_pfn
 #undef clGetCommandQueueInfo
 #define clGetCommandQueueInfo clGetCommandQueueInfo_pfn
 #undef clGetContextInfo
 #define clGetContextInfo clGetContextInfo_pfn
 #undef clGetDeviceIDs
 #define clGetDeviceIDs clGetDeviceIDs_pfn
 #undef clGetDeviceInfo
 #define clGetDeviceInfo clGetDeviceInfo_pfn
 #undef clGetEventInfo
 #define clGetEventInfo clGetEventInfo_pfn
 #undef clGetEventProfilingInfo
 #define clGetEventProfilingInfo clGetEventProfilingInfo_pfn
 #undef clGetExtensionFunctionAddress
 #define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_pfn
 #undef clGetExtensionFunctionAddressForPlatform
 #define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_pfn
 #undef clGetImageInfo
 #define clGetImageInfo clGetImageInfo_pfn
 #undef clGetKernelArgInfo
 #define clGetKernelArgInfo clGetKernelArgInfo_pfn
 #undef clGetKernelInfo
 #define clGetKernelInfo clGetKernelInfo_pfn
 #undef clGetKernelWorkGroupInfo
 #define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_pfn
 #undef clGetMemObjectInfo
 #define clGetMemObjectInfo clGetMemObjectInfo_pfn
 #undef clGetPlatformIDs
 #define clGetPlatformIDs clGetPlatformIDs_pfn
 #undef clGetPlatformInfo
 #define clGetPlatformInfo clGetPlatformInfo_pfn
 #undef clGetProgramBuildInfo
 #define clGetProgramBuildInfo clGetProgramBuildInfo_pfn
 #undef clGetProgramInfo
 #define clGetProgramInfo clGetProgramInfo_pfn
 #undef clGetSamplerInfo
 #define clGetSamplerInfo clGetSamplerInfo_pfn
 #undef clGetSupportedImageFormats
 #define clGetSupportedImageFormats clGetSupportedImageFormats_pfn
 #undef clLinkProgram
 #define clLinkProgram clLinkProgram_pfn
 #undef clReleaseCommandQueue
 #define clReleaseCommandQueue clReleaseCommandQueue_pfn
 #undef clReleaseContext
 #define clReleaseContext clReleaseContext_pfn
 #undef clReleaseDevice
 #define clReleaseDevice clReleaseDevice_pfn
 #undef clReleaseEvent
 #define clReleaseEvent clReleaseEvent_pfn
 #undef clReleaseKernel
 #define clReleaseKernel clReleaseKernel_pfn
 #undef clReleaseMemObject
 #define clReleaseMemObject clReleaseMemObject_pfn
 #undef clReleaseProgram
 #define clReleaseProgram clReleaseProgram_pfn
 #undef clReleaseSampler
 #define clReleaseSampler clReleaseSampler_pfn
 #undef clRetainCommandQueue
 #define clRetainCommandQueue clRetainCommandQueue_pfn
 #undef clRetainContext
 #define clRetainContext clRetainContext_pfn
 #undef clRetainDevice
 #define clRetainDevice clRetainDevice_pfn
 #undef clRetainEvent
 #define clRetainEvent clRetainEvent_pfn
 #undef clRetainKernel
 #define clRetainKernel clRetainKernel_pfn
 #undef clRetainMemObject
 #define clRetainMemObject clRetainMemObject_pfn
 #undef clRetainProgram
 #define clRetainProgram clRetainProgram_pfn
 #undef clRetainSampler
 #define clRetainSampler clRetainSampler_pfn
 #undef clSetEventCallback
 #define clSetEventCallback clSetEventCallback_pfn
 #undef clSetKernelArg
 #define clSetKernelArg clSetKernelArg_pfn
 #undef clSetMemObjectDestructorCallback
 #define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_pfn
 #undef clSetUserEventStatus
 #define clSetUserEventStatus clSetUserEventStatus_pfn
 #undef clUnloadCompiler
 #define clUnloadCompiler clUnloadCompiler_pfn
 #undef clUnloadPlatformCompiler
 #define clUnloadPlatformCompiler clUnloadPlatformCompiler_pfn
 #undef clWaitForEvents
 #define clWaitForEvents clWaitForEvents_pfn
 // generated by parser_cl.py
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clBuildProgram)(cl_program, cl_uint, const cl_device_id*, const char*, void (CL_CALLBACK*) (cl_program, void*), void*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCompileProgram)(cl_program, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, const char**, void (CL_CALLBACK*) (cl_program, void*), void*);
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateBuffer)(cl_context, cl_mem_flags, size_t, void*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_command_queue (CL_API_CALL*clCreateCommandQueue)(cl_context, cl_device_id, cl_command_queue_properties, cl_int*);
 extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContext)(const cl_context_properties*, cl_uint, const cl_device_id*, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_context (CL_API_CALL*clCreateContextFromType)(const cl_context_properties*, cl_device_type, void (CL_CALLBACK*) (const char*, const void*, size_t, void*), void*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage)(cl_context, cl_mem_flags, const cl_image_format*, const cl_image_desc*, void*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage2D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, void*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateImage3D)(cl_context, cl_mem_flags, const cl_image_format*, size_t, size_t, size_t, size_t, size_t, void*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_kernel (CL_API_CALL*clCreateKernel)(cl_program, const char*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateKernelsInProgram)(cl_program, cl_uint, cl_kernel*, cl_uint*);
 extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBinary)(cl_context, cl_uint, const cl_device_id*, const size_t*, const unsigned char**, cl_int*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithBuiltInKernels)(cl_context, cl_uint, const cl_device_id*, const char*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clCreateProgramWithSource)(cl_context, cl_uint, const char**, const size_t*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_sampler (CL_API_CALL*clCreateSampler)(cl_context, cl_bool, cl_addressing_mode, cl_filter_mode, cl_int*);
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateSubBuffer)(cl_mem, cl_mem_flags, cl_buffer_create_type, const void*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clCreateSubDevices)(cl_device_id, const cl_device_partition_property*, cl_uint, cl_device_id*, cl_uint*);
 extern CL_RUNTIME_EXPORT cl_event (CL_API_CALL*clCreateUserEvent)(cl_context, cl_int*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrier)(cl_command_queue);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueBarrierWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBuffer)(cl_command_queue, cl_mem, cl_mem, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferRect)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyBufferToImage)(cl_command_queue, cl_mem, cl_mem, size_t, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImage)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueCopyImageToBuffer)(cl_command_queue, cl_mem, cl_mem, const size_t*, const size_t*, size_t, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillBuffer)(cl_command_queue, cl_mem, const void*, size_t, size_t, size_t, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueFillImage)(cl_command_queue, cl_mem, const void*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapBuffer)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, size_t, size_t, cl_uint, const cl_event*, cl_event*, cl_int*);
 extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clEnqueueMapImage)(cl_command_queue, cl_mem, cl_bool, cl_map_flags, const size_t*, const size_t*, size_t*, size_t*, cl_uint, const cl_event*, cl_event*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarker)(cl_command_queue, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMarkerWithWaitList)(cl_command_queue, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueMigrateMemObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_mem_migration_flags, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNDRangeKernel)(cl_command_queue, cl_kernel, cl_uint, const size_t*, const size_t*, const size_t*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueNativeKernel)(cl_command_queue, void (CL_CALLBACK*) (void*), void*, size_t, cl_uint, const cl_mem*, const void**, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReadImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, void*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueTask)(cl_command_queue, cl_kernel, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueUnmapMemObject)(cl_command_queue, cl_mem, void*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWaitForEvents)(cl_command_queue, cl_uint, const cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBuffer)(cl_command_queue, cl_mem, cl_bool, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteBufferRect)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, const size_t*, size_t, size_t, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueWriteImage)(cl_command_queue, cl_mem, cl_bool, const size_t*, const size_t*, size_t, size_t, const void*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFinish)(cl_command_queue);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clFlush)(cl_command_queue);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetCommandQueueInfo)(cl_command_queue, cl_command_queue_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetContextInfo)(cl_context, cl_context_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceIDs)(cl_platform_id, cl_device_type, cl_uint, cl_device_id*, cl_uint*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetDeviceInfo)(cl_device_id, cl_device_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventInfo)(cl_event, cl_event_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetEventProfilingInfo)(cl_event, cl_profiling_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddress)(const char*);
 extern CL_RUNTIME_EXPORT void* (CL_API_CALL*clGetExtensionFunctionAddressForPlatform)(cl_platform_id, const char*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetImageInfo)(cl_mem, cl_image_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelArgInfo)(cl_kernel, cl_uint, cl_kernel_arg_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelInfo)(cl_kernel, cl_kernel_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetKernelWorkGroupInfo)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetMemObjectInfo)(cl_mem, cl_mem_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformIDs)(cl_uint, cl_platform_id*, cl_uint*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetPlatformInfo)(cl_platform_id, cl_platform_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramBuildInfo)(cl_program, cl_device_id, cl_program_build_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetProgramInfo)(cl_program, cl_program_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSamplerInfo)(cl_sampler, cl_sampler_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetSupportedImageFormats)(cl_context, cl_mem_flags, cl_mem_object_type, cl_uint, cl_image_format*, cl_uint*);
 extern CL_RUNTIME_EXPORT cl_program (CL_API_CALL*clLinkProgram)(cl_context, cl_uint, const cl_device_id*, const char*, cl_uint, const cl_program*, void (CL_CALLBACK*) (cl_program, void*), void*, cl_int*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseCommandQueue)(cl_command_queue);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseContext)(cl_context);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseDevice)(cl_device_id);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseEvent)(cl_event);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseKernel)(cl_kernel);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseMemObject)(cl_mem);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseProgram)(cl_program);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clReleaseSampler)(cl_sampler);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainCommandQueue)(cl_command_queue);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainContext)(cl_context);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainDevice)(cl_device_id);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainEvent)(cl_event);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainKernel)(cl_kernel);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainMemObject)(cl_mem);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainProgram)(cl_program);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clRetainSampler)(cl_sampler);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetEventCallback)(cl_event, cl_int, void (CL_CALLBACK*) (cl_event, cl_int, void*), void*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetKernelArg)(cl_kernel, cl_uint, size_t, const void*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetMemObjectDestructorCallback)(cl_mem, void (CL_CALLBACK*) (cl_mem, void*), void*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clSetUserEventStatus)(cl_event, cl_int);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadCompiler)();
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clUnloadPlatformCompiler)(cl_platform_id);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clWaitForEvents)(cl_uint, const cl_event*);
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_core_wrappers.hpp
@ -0,0 +1,272 @@
 //
 // AUTOGENERATED, DO NOT EDIT
 //
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP
 #error "Invalid usage"
 #endif
 // generated by parser_cl.py
 #undef clBuildProgram
 #define clBuildProgram clBuildProgram_fn
 inline cl_int clBuildProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, void (CL_CALLBACK*p4) (cl_program, void*), void* p5) { return clBuildProgram_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clCompileProgram
 #define clCompileProgram clCompileProgram_fn
 inline cl_int clCompileProgram(cl_program p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, const char** p6, void (CL_CALLBACK*p7) (cl_program, void*), void* p8) { return clCompileProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clCreateBuffer
 #define clCreateBuffer clCreateBuffer_fn
 inline cl_mem clCreateBuffer(cl_context p0, cl_mem_flags p1, size_t p2, void* p3, cl_int* p4) { return clCreateBuffer_pfn(p0, p1, p2, p3, p4); }
 #undef clCreateCommandQueue
 #define clCreateCommandQueue clCreateCommandQueue_fn
 inline cl_command_queue clCreateCommandQueue(cl_context p0, cl_device_id p1, cl_command_queue_properties p2, cl_int* p3) { return clCreateCommandQueue_pfn(p0, p1, p2, p3); }
 #undef clCreateContext
 #define clCreateContext clCreateContext_fn
 inline cl_context clCreateContext(const cl_context_properties* p0, cl_uint p1, const cl_device_id* p2, void (CL_CALLBACK*p3) (const char*, const void*, size_t, void*), void* p4, cl_int* p5) { return clCreateContext_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clCreateContextFromType
 #define clCreateContextFromType clCreateContextFromType_fn
 inline cl_context clCreateContextFromType(const cl_context_properties* p0, cl_device_type p1, void (CL_CALLBACK*p2) (const char*, const void*, size_t, void*), void* p3, cl_int* p4) { return clCreateContextFromType_pfn(p0, p1, p2, p3, p4); }
 #undef clCreateImage
 #define clCreateImage clCreateImage_fn
 inline cl_mem clCreateImage(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, const cl_image_desc* p3, void* p4, cl_int* p5) { return clCreateImage_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clCreateImage2D
 #define clCreateImage2D clCreateImage2D_fn
 inline cl_mem clCreateImage2D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, void* p6, cl_int* p7) { return clCreateImage2D_pfn(p0, p1, p2, p3, p4, p5, p6, p7); }
 #undef clCreateImage3D
 #define clCreateImage3D clCreateImage3D_fn
 inline cl_mem clCreateImage3D(cl_context p0, cl_mem_flags p1, const cl_image_format* p2, size_t p3, size_t p4, size_t p5, size_t p6, size_t p7, void* p8, cl_int* p9) { return clCreateImage3D_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); }
 #undef clCreateKernel
 #define clCreateKernel clCreateKernel_fn
 inline cl_kernel clCreateKernel(cl_program p0, const char* p1, cl_int* p2) { return clCreateKernel_pfn(p0, p1, p2); }
 #undef clCreateKernelsInProgram
 #define clCreateKernelsInProgram clCreateKernelsInProgram_fn
 inline cl_int clCreateKernelsInProgram(cl_program p0, cl_uint p1, cl_kernel* p2, cl_uint* p3) { return clCreateKernelsInProgram_pfn(p0, p1, p2, p3); }
 #undef clCreateProgramWithBinary
 #define clCreateProgramWithBinary clCreateProgramWithBinary_fn
 inline cl_program clCreateProgramWithBinary(cl_context p0, cl_uint p1, const cl_device_id* p2, const size_t* p3, const unsigned char** p4, cl_int* p5, cl_int* p6) { return clCreateProgramWithBinary_pfn(p0, p1, p2, p3, p4, p5, p6); }
 #undef clCreateProgramWithBuiltInKernels
 #define clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels_fn
 inline cl_program clCreateProgramWithBuiltInKernels(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_int* p4) { return clCreateProgramWithBuiltInKernels_pfn(p0, p1, p2, p3, p4); }
 #undef clCreateProgramWithSource
 #define clCreateProgramWithSource clCreateProgramWithSource_fn
 inline cl_program clCreateProgramWithSource(cl_context p0, cl_uint p1, const char** p2, const size_t* p3, cl_int* p4) { return clCreateProgramWithSource_pfn(p0, p1, p2, p3, p4); }
 #undef clCreateSampler
 #define clCreateSampler clCreateSampler_fn
 inline cl_sampler clCreateSampler(cl_context p0, cl_bool p1, cl_addressing_mode p2, cl_filter_mode p3, cl_int* p4) { return clCreateSampler_pfn(p0, p1, p2, p3, p4); }
 #undef clCreateSubBuffer
 #define clCreateSubBuffer clCreateSubBuffer_fn
 inline cl_mem clCreateSubBuffer(cl_mem p0, cl_mem_flags p1, cl_buffer_create_type p2, const void* p3, cl_int* p4) { return clCreateSubBuffer_pfn(p0, p1, p2, p3, p4); }
 #undef clCreateSubDevices
 #define clCreateSubDevices clCreateSubDevices_fn
 inline cl_int clCreateSubDevices(cl_device_id p0, const cl_device_partition_property* p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clCreateSubDevices_pfn(p0, p1, p2, p3, p4); }
 #undef clCreateUserEvent
 #define clCreateUserEvent clCreateUserEvent_fn
 inline cl_event clCreateUserEvent(cl_context p0, cl_int* p1) { return clCreateUserEvent_pfn(p0, p1); }
 #undef clEnqueueBarrier
 #define clEnqueueBarrier clEnqueueBarrier_fn
 inline cl_int clEnqueueBarrier(cl_command_queue p0) { return clEnqueueBarrier_pfn(p0); }
 #undef clEnqueueBarrierWithWaitList
 #define clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList_fn
 inline cl_int clEnqueueBarrierWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueBarrierWithWaitList_pfn(p0, p1, p2, p3); }
 #undef clEnqueueCopyBuffer
 #define clEnqueueCopyBuffer clEnqueueCopyBuffer_fn
 inline cl_int clEnqueueCopyBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clEnqueueCopyBufferRect
 #define clEnqueueCopyBufferRect clEnqueueCopyBufferRect_fn
 inline cl_int clEnqueueCopyBufferRect(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, cl_uint p10, const cl_event* p11, cl_event* p12) { return clEnqueueCopyBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12); }
 #undef clEnqueueCopyBufferToImage
 #define clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage_fn
 inline cl_int clEnqueueCopyBufferToImage(cl_command_queue p0, cl_mem p1, cl_mem p2, size_t p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyBufferToImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clEnqueueCopyImage
 #define clEnqueueCopyImage clEnqueueCopyImage_fn
 inline cl_int clEnqueueCopyImage(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clEnqueueCopyImageToBuffer
 #define clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer_fn
 inline cl_int clEnqueueCopyImageToBuffer(cl_command_queue p0, cl_mem p1, cl_mem p2, const size_t* p3, const size_t* p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueCopyImageToBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clEnqueueFillBuffer
 #define clEnqueueFillBuffer clEnqueueFillBuffer_fn
 inline cl_int clEnqueueFillBuffer(cl_command_queue p0, cl_mem p1, const void* p2, size_t p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueFillBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clEnqueueFillImage
 #define clEnqueueFillImage clEnqueueFillImage_fn
 inline cl_int clEnqueueFillImage(cl_command_queue p0, cl_mem p1, const void* p2, const size_t* p3, const size_t* p4, cl_uint p5, const cl_event* p6, cl_event* p7) { return clEnqueueFillImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7); }
 #undef clEnqueueMapBuffer
 #define clEnqueueMapBuffer clEnqueueMapBuffer_fn
 inline void* clEnqueueMapBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, size_t p4, size_t p5, cl_uint p6, const cl_event* p7, cl_event* p8, cl_int* p9) { return clEnqueueMapBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); }
 #undef clEnqueueMapImage
 #define clEnqueueMapImage clEnqueueMapImage_fn
 inline void* clEnqueueMapImage(cl_command_queue p0, cl_mem p1, cl_bool p2, cl_map_flags p3, const size_t* p4, const size_t* p5, size_t* p6, size_t* p7, cl_uint p8, const cl_event* p9, cl_event* p10, cl_int* p11) { return clEnqueueMapImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11); }
 #undef clEnqueueMarker
 #define clEnqueueMarker clEnqueueMarker_fn
 inline cl_int clEnqueueMarker(cl_command_queue p0, cl_event* p1) { return clEnqueueMarker_pfn(p0, p1); }
 #undef clEnqueueMarkerWithWaitList
 #define clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList_fn
 inline cl_int clEnqueueMarkerWithWaitList(cl_command_queue p0, cl_uint p1, const cl_event* p2, cl_event* p3) { return clEnqueueMarkerWithWaitList_pfn(p0, p1, p2, p3); }
 #undef clEnqueueMigrateMemObjects
 #define clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects_fn
 inline cl_int clEnqueueMigrateMemObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_mem_migration_flags p3, cl_uint p4, const cl_event* p5, cl_event* p6) { return clEnqueueMigrateMemObjects_pfn(p0, p1, p2, p3, p4, p5, p6); }
 #undef clEnqueueNDRangeKernel
 #define clEnqueueNDRangeKernel clEnqueueNDRangeKernel_fn
 inline cl_int clEnqueueNDRangeKernel(cl_command_queue p0, cl_kernel p1, cl_uint p2, const size_t* p3, const size_t* p4, const size_t* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueNDRangeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clEnqueueNativeKernel
 #define clEnqueueNativeKernel clEnqueueNativeKernel_fn
 inline cl_int clEnqueueNativeKernel(cl_command_queue p0, void (CL_CALLBACK*p1) (void*), void* p2, size_t p3, cl_uint p4, const cl_mem* p5, const void** p6, cl_uint p7, const cl_event* p8, cl_event* p9) { return clEnqueueNativeKernel_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); }
 #undef clEnqueueReadBuffer
 #define clEnqueueReadBuffer clEnqueueReadBuffer_fn
 inline cl_int clEnqueueReadBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueReadBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clEnqueueReadBufferRect
 #define clEnqueueReadBufferRect clEnqueueReadBufferRect_fn
 inline cl_int clEnqueueReadBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueReadBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); }
 #undef clEnqueueReadImage
 #define clEnqueueReadImage clEnqueueReadImage_fn
 inline cl_int clEnqueueReadImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueReadImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); }
 #undef clEnqueueTask
 #define clEnqueueTask clEnqueueTask_fn
 inline cl_int clEnqueueTask(cl_command_queue p0, cl_kernel p1, cl_uint p2, const cl_event* p3, cl_event* p4) { return clEnqueueTask_pfn(p0, p1, p2, p3, p4); }
 #undef clEnqueueUnmapMemObject
 #define clEnqueueUnmapMemObject clEnqueueUnmapMemObject_fn
 inline cl_int clEnqueueUnmapMemObject(cl_command_queue p0, cl_mem p1, void* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueUnmapMemObject_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clEnqueueWaitForEvents
 #define clEnqueueWaitForEvents clEnqueueWaitForEvents_fn
 inline cl_int clEnqueueWaitForEvents(cl_command_queue p0, cl_uint p1, const cl_event* p2) { return clEnqueueWaitForEvents_pfn(p0, p1, p2); }
 #undef clEnqueueWriteBuffer
 #define clEnqueueWriteBuffer clEnqueueWriteBuffer_fn
 inline cl_int clEnqueueWriteBuffer(cl_command_queue p0, cl_mem p1, cl_bool p2, size_t p3, size_t p4, const void* p5, cl_uint p6, const cl_event* p7, cl_event* p8) { return clEnqueueWriteBuffer_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clEnqueueWriteBufferRect
 #define clEnqueueWriteBufferRect clEnqueueWriteBufferRect_fn
 inline cl_int clEnqueueWriteBufferRect(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, const size_t* p5, size_t p6, size_t p7, size_t p8, size_t p9, const void* p10, cl_uint p11, const cl_event* p12, cl_event* p13) { return clEnqueueWriteBufferRect_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13); }
 #undef clEnqueueWriteImage
 #define clEnqueueWriteImage clEnqueueWriteImage_fn
 inline cl_int clEnqueueWriteImage(cl_command_queue p0, cl_mem p1, cl_bool p2, const size_t* p3, const size_t* p4, size_t p5, size_t p6, const void* p7, cl_uint p8, const cl_event* p9, cl_event* p10) { return clEnqueueWriteImage_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); }
 #undef clFinish
 #define clFinish clFinish_fn
 inline cl_int clFinish(cl_command_queue p0) { return clFinish_pfn(p0); }
 #undef clFlush
 #define clFlush clFlush_fn
 inline cl_int clFlush(cl_command_queue p0) { return clFlush_pfn(p0); }
 #undef clGetCommandQueueInfo
 #define clGetCommandQueueInfo clGetCommandQueueInfo_fn
 inline cl_int clGetCommandQueueInfo(cl_command_queue p0, cl_command_queue_info p1, size_t p2, void* p3, size_t* p4) { return clGetCommandQueueInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetContextInfo
 #define clGetContextInfo clGetContextInfo_fn
 inline cl_int clGetContextInfo(cl_context p0, cl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetContextInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetDeviceIDs
 #define clGetDeviceIDs clGetDeviceIDs_fn
 inline cl_int clGetDeviceIDs(cl_platform_id p0, cl_device_type p1, cl_uint p2, cl_device_id* p3, cl_uint* p4) { return clGetDeviceIDs_pfn(p0, p1, p2, p3, p4); }
 #undef clGetDeviceInfo
 #define clGetDeviceInfo clGetDeviceInfo_fn
 inline cl_int clGetDeviceInfo(cl_device_id p0, cl_device_info p1, size_t p2, void* p3, size_t* p4) { return clGetDeviceInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetEventInfo
 #define clGetEventInfo clGetEventInfo_fn
 inline cl_int clGetEventInfo(cl_event p0, cl_event_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetEventProfilingInfo
 #define clGetEventProfilingInfo clGetEventProfilingInfo_fn
 inline cl_int clGetEventProfilingInfo(cl_event p0, cl_profiling_info p1, size_t p2, void* p3, size_t* p4) { return clGetEventProfilingInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetExtensionFunctionAddress
 #define clGetExtensionFunctionAddress clGetExtensionFunctionAddress_fn
 inline void* clGetExtensionFunctionAddress(const char* p0) { return clGetExtensionFunctionAddress_pfn(p0); }
 #undef clGetExtensionFunctionAddressForPlatform
 #define clGetExtensionFunctionAddressForPlatform clGetExtensionFunctionAddressForPlatform_fn
 inline void* clGetExtensionFunctionAddressForPlatform(cl_platform_id p0, const char* p1) { return clGetExtensionFunctionAddressForPlatform_pfn(p0, p1); }
 #undef clGetImageInfo
 #define clGetImageInfo clGetImageInfo_fn
 inline cl_int clGetImageInfo(cl_mem p0, cl_image_info p1, size_t p2, void* p3, size_t* p4) { return clGetImageInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetKernelArgInfo
 #define clGetKernelArgInfo clGetKernelArgInfo_fn
 inline cl_int clGetKernelArgInfo(cl_kernel p0, cl_uint p1, cl_kernel_arg_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelArgInfo_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clGetKernelInfo
 #define clGetKernelInfo clGetKernelInfo_fn
 inline cl_int clGetKernelInfo(cl_kernel p0, cl_kernel_info p1, size_t p2, void* p3, size_t* p4) { return clGetKernelInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetKernelWorkGroupInfo
 #define clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo_fn
 inline cl_int clGetKernelWorkGroupInfo(cl_kernel p0, cl_device_id p1, cl_kernel_work_group_info p2, size_t p3, void* p4, size_t* p5) { return clGetKernelWorkGroupInfo_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clGetMemObjectInfo
 #define clGetMemObjectInfo clGetMemObjectInfo_fn
 inline cl_int clGetMemObjectInfo(cl_mem p0, cl_mem_info p1, size_t p2, void* p3, size_t* p4) { return clGetMemObjectInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetPlatformIDs
 #define clGetPlatformIDs clGetPlatformIDs_fn
 inline cl_int clGetPlatformIDs(cl_uint p0, cl_platform_id* p1, cl_uint* p2) { return clGetPlatformIDs_pfn(p0, p1, p2); }
 #undef clGetPlatformInfo
 #define clGetPlatformInfo clGetPlatformInfo_fn
 inline cl_int clGetPlatformInfo(cl_platform_id p0, cl_platform_info p1, size_t p2, void* p3, size_t* p4) { return clGetPlatformInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetProgramBuildInfo
 #define clGetProgramBuildInfo clGetProgramBuildInfo_fn
 inline cl_int clGetProgramBuildInfo(cl_program p0, cl_device_id p1, cl_program_build_info p2, size_t p3, void* p4, size_t* p5) { return clGetProgramBuildInfo_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clGetProgramInfo
 #define clGetProgramInfo clGetProgramInfo_fn
 inline cl_int clGetProgramInfo(cl_program p0, cl_program_info p1, size_t p2, void* p3, size_t* p4) { return clGetProgramInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetSamplerInfo
 #define clGetSamplerInfo clGetSamplerInfo_fn
 inline cl_int clGetSamplerInfo(cl_sampler p0, cl_sampler_info p1, size_t p2, void* p3, size_t* p4) { return clGetSamplerInfo_pfn(p0, p1, p2, p3, p4); }
 #undef clGetSupportedImageFormats
 #define clGetSupportedImageFormats clGetSupportedImageFormats_fn
 inline cl_int clGetSupportedImageFormats(cl_context p0, cl_mem_flags p1, cl_mem_object_type p2, cl_uint p3, cl_image_format* p4, cl_uint* p5) { return clGetSupportedImageFormats_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clLinkProgram
 #define clLinkProgram clLinkProgram_fn
 inline cl_program clLinkProgram(cl_context p0, cl_uint p1, const cl_device_id* p2, const char* p3, cl_uint p4, const cl_program* p5, void (CL_CALLBACK*p6) (cl_program, void*), void* p7, cl_int* p8) { return clLinkProgram_pfn(p0, p1, p2, p3, p4, p5, p6, p7, p8); }
 #undef clReleaseCommandQueue
 #define clReleaseCommandQueue clReleaseCommandQueue_fn
 inline cl_int clReleaseCommandQueue(cl_command_queue p0) { return clReleaseCommandQueue_pfn(p0); }
 #undef clReleaseContext
 #define clReleaseContext clReleaseContext_fn
 inline cl_int clReleaseContext(cl_context p0) { return clReleaseContext_pfn(p0); }
 #undef clReleaseDevice
 #define clReleaseDevice clReleaseDevice_fn
 inline cl_int clReleaseDevice(cl_device_id p0) { return clReleaseDevice_pfn(p0); }
 #undef clReleaseEvent
 #define clReleaseEvent clReleaseEvent_fn
 inline cl_int clReleaseEvent(cl_event p0) { return clReleaseEvent_pfn(p0); }
 #undef clReleaseKernel
 #define clReleaseKernel clReleaseKernel_fn
 inline cl_int clReleaseKernel(cl_kernel p0) { return clReleaseKernel_pfn(p0); }
 #undef clReleaseMemObject
 #define clReleaseMemObject clReleaseMemObject_fn
 inline cl_int clReleaseMemObject(cl_mem p0) { return clReleaseMemObject_pfn(p0); }
 #undef clReleaseProgram
 #define clReleaseProgram clReleaseProgram_fn
 inline cl_int clReleaseProgram(cl_program p0) { return clReleaseProgram_pfn(p0); }
 #undef clReleaseSampler
 #define clReleaseSampler clReleaseSampler_fn
 inline cl_int clReleaseSampler(cl_sampler p0) { return clReleaseSampler_pfn(p0); }
 #undef clRetainCommandQueue
 #define clRetainCommandQueue clRetainCommandQueue_fn
 inline cl_int clRetainCommandQueue(cl_command_queue p0) { return clRetainCommandQueue_pfn(p0); }
 #undef clRetainContext
 #define clRetainContext clRetainContext_fn
 inline cl_int clRetainContext(cl_context p0) { return clRetainContext_pfn(p0); }
 #undef clRetainDevice
 #define clRetainDevice clRetainDevice_fn
 inline cl_int clRetainDevice(cl_device_id p0) { return clRetainDevice_pfn(p0); }
 #undef clRetainEvent
 #define clRetainEvent clRetainEvent_fn
 inline cl_int clRetainEvent(cl_event p0) { return clRetainEvent_pfn(p0); }
 #undef clRetainKernel
 #define clRetainKernel clRetainKernel_fn
 inline cl_int clRetainKernel(cl_kernel p0) { return clRetainKernel_pfn(p0); }
 #undef clRetainMemObject
 #define clRetainMemObject clRetainMemObject_fn
 inline cl_int clRetainMemObject(cl_mem p0) { return clRetainMemObject_pfn(p0); }
 #undef clRetainProgram
 #define clRetainProgram clRetainProgram_fn
 inline cl_int clRetainProgram(cl_program p0) { return clRetainProgram_pfn(p0); }
 #undef clRetainSampler
 #define clRetainSampler clRetainSampler_fn
 inline cl_int clRetainSampler(cl_sampler p0) { return clRetainSampler_pfn(p0); }
 #undef clSetEventCallback
 #define clSetEventCallback clSetEventCallback_fn
 inline cl_int clSetEventCallback(cl_event p0, cl_int p1, void (CL_CALLBACK*p2) (cl_event, cl_int, void*), void* p3) { return clSetEventCallback_pfn(p0, p1, p2, p3); }
 #undef clSetKernelArg
 #define clSetKernelArg clSetKernelArg_fn
 inline cl_int clSetKernelArg(cl_kernel p0, cl_uint p1, size_t p2, const void* p3) { return clSetKernelArg_pfn(p0, p1, p2, p3); }
 #undef clSetMemObjectDestructorCallback
 #define clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback_fn
 inline cl_int clSetMemObjectDestructorCallback(cl_mem p0, void (CL_CALLBACK*p1) (cl_mem, void*), void* p2) { return clSetMemObjectDestructorCallback_pfn(p0, p1, p2); }
 #undef clSetUserEventStatus
 #define clSetUserEventStatus clSetUserEventStatus_fn
 inline cl_int clSetUserEventStatus(cl_event p0, cl_int p1) { return clSetUserEventStatus_pfn(p0, p1); }
 #undef clUnloadCompiler
 #define clUnloadCompiler clUnloadCompiler_fn
 inline cl_int clUnloadCompiler() { return clUnloadCompiler_pfn(); }
 #undef clUnloadPlatformCompiler
 #define clUnloadPlatformCompiler clUnloadPlatformCompiler_fn
 inline cl_int clUnloadPlatformCompiler(cl_platform_id p0) { return clUnloadPlatformCompiler_pfn(p0); }
 #undef clWaitForEvents
 #define clWaitForEvents clWaitForEvents_fn
 inline cl_int clWaitForEvents(cl_uint p0, const cl_event* p1) { return clWaitForEvents_pfn(p0, p1); }
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl.hpp
@ -0,0 +1,62 @@
 //
 // AUTOGENERATED, DO NOT EDIT
 //
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP
 #error "Invalid usage"
 #endif
 // generated by parser_cl.py
 #define clCreateFromGLBuffer clCreateFromGLBuffer_
 #define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_
 #define clCreateFromGLTexture clCreateFromGLTexture_
 #define clCreateFromGLTexture2D clCreateFromGLTexture2D_
 #define clCreateFromGLTexture3D clCreateFromGLTexture3D_
 #define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_
 #define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_
 #define clGetGLContextInfoKHR clGetGLContextInfoKHR_
 #define clGetGLObjectInfo clGetGLObjectInfo_
 #define clGetGLTextureInfo clGetGLTextureInfo_
 #if defined __APPLE__
 #include <OpenCL/cl_gl.h>
 #else
 #include <CL/cl_gl.h>
 #endif
 // generated by parser_cl.py
 #undef clCreateFromGLBuffer
 #define clCreateFromGLBuffer clCreateFromGLBuffer_pfn
 #undef clCreateFromGLRenderbuffer
 #define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_pfn
 #undef clCreateFromGLTexture
 #define clCreateFromGLTexture clCreateFromGLTexture_pfn
 #undef clCreateFromGLTexture2D
 #define clCreateFromGLTexture2D clCreateFromGLTexture2D_pfn
 #undef clCreateFromGLTexture3D
 #define clCreateFromGLTexture3D clCreateFromGLTexture3D_pfn
 #undef clEnqueueAcquireGLObjects
 #define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_pfn
 #undef clEnqueueReleaseGLObjects
 #define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_pfn
 #undef clGetGLContextInfoKHR
 #define clGetGLContextInfoKHR clGetGLContextInfoKHR_pfn
 #undef clGetGLObjectInfo
 #define clGetGLObjectInfo clGetGLObjectInfo_pfn
 #undef clGetGLTextureInfo
 #define clGetGLTextureInfo clGetGLTextureInfo_pfn
 #ifdef cl_khr_gl_sharing
 // generated by parser_cl.py
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLBuffer)(cl_context, cl_mem_flags, cl_GLuint, int*);
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLRenderbuffer)(cl_context, cl_mem_flags, cl_GLuint, cl_int*);
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*);
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture2D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*);
 extern CL_RUNTIME_EXPORT cl_mem (CL_API_CALL*clCreateFromGLTexture3D)(cl_context, cl_mem_flags, cl_GLenum, cl_GLint, cl_GLuint, cl_int*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueAcquireGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clEnqueueReleaseGLObjects)(cl_command_queue, cl_uint, const cl_mem*, cl_uint, const cl_event*, cl_event*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLContextInfoKHR)(const cl_context_properties*, cl_gl_context_info, size_t, void*, size_t*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLObjectInfo)(cl_mem, cl_gl_object_type*, cl_GLuint*);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL*clGetGLTextureInfo)(cl_mem, cl_gl_texture_info, size_t, void*, size_t*);
 #endif // cl_khr_gl_sharing
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/autogenerated/opencl_gl_wrappers.hpp
@ -0,0 +1,42 @@
 //
 // AUTOGENERATED, DO NOT EDIT
 //
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP
 #error "Invalid usage"
 #endif
 #ifdef cl_khr_gl_sharing
 // generated by parser_cl.py
 #undef clCreateFromGLBuffer
 #define clCreateFromGLBuffer clCreateFromGLBuffer_fn
 inline cl_mem clCreateFromGLBuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, int* p3) { return clCreateFromGLBuffer_pfn(p0, p1, p2, p3); }
 #undef clCreateFromGLRenderbuffer
 #define clCreateFromGLRenderbuffer clCreateFromGLRenderbuffer_fn
 inline cl_mem clCreateFromGLRenderbuffer(cl_context p0, cl_mem_flags p1, cl_GLuint p2, cl_int* p3) { return clCreateFromGLRenderbuffer_pfn(p0, p1, p2, p3); }
 #undef clCreateFromGLTexture
 #define clCreateFromGLTexture clCreateFromGLTexture_fn
 inline cl_mem clCreateFromGLTexture(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clCreateFromGLTexture2D
 #define clCreateFromGLTexture2D clCreateFromGLTexture2D_fn
 inline cl_mem clCreateFromGLTexture2D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture2D_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clCreateFromGLTexture3D
 #define clCreateFromGLTexture3D clCreateFromGLTexture3D_fn
 inline cl_mem clCreateFromGLTexture3D(cl_context p0, cl_mem_flags p1, cl_GLenum p2, cl_GLint p3, cl_GLuint p4, cl_int* p5) { return clCreateFromGLTexture3D_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clEnqueueAcquireGLObjects
 #define clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects_fn
 inline cl_int clEnqueueAcquireGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueAcquireGLObjects_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clEnqueueReleaseGLObjects
 #define clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects_fn
 inline cl_int clEnqueueReleaseGLObjects(cl_command_queue p0, cl_uint p1, const cl_mem* p2, cl_uint p3, const cl_event* p4, cl_event* p5) { return clEnqueueReleaseGLObjects_pfn(p0, p1, p2, p3, p4, p5); }
 #undef clGetGLContextInfoKHR
 #define clGetGLContextInfoKHR clGetGLContextInfoKHR_fn
 inline cl_int clGetGLContextInfoKHR(const cl_context_properties* p0, cl_gl_context_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLContextInfoKHR_pfn(p0, p1, p2, p3, p4); }
 #undef clGetGLObjectInfo
 #define clGetGLObjectInfo clGetGLObjectInfo_fn
 inline cl_int clGetGLObjectInfo(cl_mem p0, cl_gl_object_type* p1, cl_GLuint* p2) { return clGetGLObjectInfo_pfn(p0, p1, p2); }
 #undef clGetGLTextureInfo
 #define clGetGLTextureInfo clGetGLTextureInfo_fn
 inline cl_int clGetGLTextureInfo(cl_mem p0, cl_gl_texture_info p1, size_t p2, void* p3, size_t* p4) { return clGetGLTextureInfo_pfn(p0, p1, p2, p3, p4); }
 #endif // cl_khr_gl_sharing
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_clamdblas.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_clamdblas.hpp
@ -0,0 +1,53 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
 #define OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
 #ifdef HAVE_CLAMDBLAS
 #include "opencl_core.hpp"
 #include "autogenerated/opencl_clamdblas.hpp"
 #endif // HAVE_CLAMDBLAS
 #endif // OPENCV_CORE_OCL_RUNTIME_CLAMDBLAS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_clamdfft.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_clamdfft.hpp
@ -0,0 +1,53 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
 #define OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
 #ifdef HAVE_CLAMDFFT
 #include "opencl_core.hpp"
 #include "autogenerated/opencl_clamdfft.hpp"
 #endif // HAVE_CLAMDFFT
 #endif // OPENCV_CORE_OCL_RUNTIME_CLAMDFFT_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_core.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_core.hpp
@ -0,0 +1,84 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP
 #define OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP
 #ifdef HAVE_OPENCL
 #ifndef CL_RUNTIME_EXPORT
 #if (defined(BUILD_SHARED_LIBS) || defined(OPENCV_CORE_SHARED)) && (defined _WIN32 || defined WINCE) && \
    !(defined(__OPENCV_BUILD) && defined(OPENCV_MODULE_IS_PART_OF_WORLD))
 #define CL_RUNTIME_EXPORT __declspec(dllimport)
 #else
 #define CL_RUNTIME_EXPORT
 #endif
 #endif
 #ifdef HAVE_OPENCL_SVM
 #define clSVMAlloc clSVMAlloc_
 #define clSVMFree clSVMFree_
 #define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
 #define clSetKernelExecInfo clSetKernelExecInfo_
 #define clEnqueueSVMFree clEnqueueSVMFree_
 #define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
 #define clEnqueueSVMMemFill clEnqueueSVMMemFill_
 #define clEnqueueSVMMap clEnqueueSVMMap_
 #define clEnqueueSVMUnmap clEnqueueSVMUnmap_
 #endif
 #include "autogenerated/opencl_core.hpp"
 #ifndef CL_DEVICE_DOUBLE_FP_CONFIG
 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
 #endif
 #ifndef CL_DEVICE_HALF_FP_CONFIG
 #define CL_DEVICE_HALF_FP_CONFIG 0x1033
 #endif
 #ifndef CL_VERSION_1_2
 #define CV_REQUIRE_OPENCL_1_2_ERROR CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "OpenCV compiled without OpenCL v1.2 support, so we can't use functionality from OpenCL v1.2")
 #endif
 #endif // HAVE_OPENCL
 #endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_CORE_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_core_wrappers.hpp
@ -0,0 +1,47 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP
 #define OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP
 #include "autogenerated/opencl_core_wrappers.hpp"
 #endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_WRAPPERS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_gl.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_gl.hpp
@ -0,0 +1,53 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP
 #define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP
 #if defined HAVE_OPENCL && defined HAVE_OPENGL
 #include "opencl_core.hpp"
 #include "autogenerated/opencl_gl.hpp"
 #endif // defined HAVE_OPENCL && defined HAVE_OPENGL
 #endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_gl_wrappers.hpp
@ -0,0 +1,47 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP
 #define OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP
 #include "autogenerated/opencl_gl_wrappers.hpp"
 #endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_GL_WRAPPERS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_svm_20.hpp
@ -0,0 +1,48 @@
 /* See LICENSE file in the root OpenCV directory */
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP
 #define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP
 #if defined(HAVE_OPENCL_SVM)
 #include "opencl_core.hpp"
 #include "opencl_svm_definitions.hpp"
 #undef clSVMAlloc
 #define clSVMAlloc clSVMAlloc_pfn
 #undef clSVMFree
 #define clSVMFree clSVMFree_pfn
 #undef clSetKernelArgSVMPointer
 #define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
 #undef clSetKernelExecInfo
 //#define clSetKernelExecInfo clSetKernelExecInfo_pfn
 #undef clEnqueueSVMFree
 //#define clEnqueueSVMFree clEnqueueSVMFree_pfn
 #undef clEnqueueSVMMemcpy
 #define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
 #undef clEnqueueSVMMemFill
 #define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
 #undef clEnqueueSVMMap
 #define clEnqueueSVMMap clEnqueueSVMMap_pfn
 #undef clEnqueueSVMUnmap
 #define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
 extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment);
 extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value);
 //extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
 //extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
 //        void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
 //        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size,
        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size,
        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size,
        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
 extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr,
        cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
 #endif // HAVE_OPENCL_SVM
 #endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_svm_definitions.hpp
@ -0,0 +1,42 @@
 /* See LICENSE file in the root OpenCV directory */
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP
 #define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP
 #if defined(HAVE_OPENCL_SVM)
 #if defined(CL_VERSION_2_0)
 // OpenCL 2.0 contains SVM definitions
 #else
 typedef cl_bitfield cl_device_svm_capabilities;
 typedef cl_bitfield cl_svm_mem_flags;
 typedef cl_uint     cl_kernel_exec_info;
 //
 // TODO Add real values after OpenCL 2.0 release
 //
 #ifndef CL_DEVICE_SVM_CAPABILITIES
 #define CL_DEVICE_SVM_CAPABILITIES 0x1053
 #define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER             (1 << 0)
 #define CL_DEVICE_SVM_FINE_GRAIN_BUFFER               (1 << 1)
 #define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM               (1 << 2)
 #define CL_DEVICE_SVM_ATOMICS                         (1 << 3)
 #endif
 #ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
 #define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
 #endif
 #ifndef CL_MEM_SVM_ATOMICS
 #define CL_MEM_SVM_ATOMICS (1 << 11)
 #endif
 #endif // CL_VERSION_2_0
 #endif // HAVE_OPENCL_SVM
 #endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp
@ -0,0 +1,166 @@
 /* See LICENSE file in the root OpenCV directory */
 #ifndef OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP
 #define OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP
 #if defined(HAVE_OPENCL_SVM)
 #include "opencl_core.hpp"
 #ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
 //
 //  Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
 //  Below is the original copyright.
 //
 /*******************************************************************************
 * Copyright (c) 2008-2013 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
 * "Materials"), to deal in the Materials without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Materials, and to
 * permit persons to whom the Materials are furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 ******************************************************************************/
 /*******************************************
 * Shared Virtual Memory (SVM) extension
 *******************************************/
 typedef cl_bitfield                      cl_device_svm_capabilities_amd;
 typedef cl_bitfield                      cl_svm_mem_flags_amd;
 typedef cl_uint                          cl_kernel_exec_info_amd;
 /* cl_device_info */
 #define CL_DEVICE_SVM_CAPABILITIES_AMD                     0x1053
 #define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD  0x1054
 /* cl_device_svm_capabilities_amd */
 #define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD             (1 << 0)
 #define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD               (1 << 1)
 #define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD               (1 << 2)
 #define CL_DEVICE_SVM_ATOMICS_AMD                         (1 << 3)
 /* cl_svm_mem_flags_amd */
 #define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD                  (1 << 10)
 #define CL_MEM_SVM_ATOMICS_AMD                            (1 << 11)
 /* cl_mem_info */
 #define CL_MEM_USES_SVM_POINTER_AMD                       0x1109
 /* cl_kernel_exec_info_amd */
 #define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD                  0x11B6
 #define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD     0x11B7
 /* cl_command_type */
 #define CL_COMMAND_SVM_FREE_AMD                           0x1209
 #define CL_COMMAND_SVM_MEMCPY_AMD                         0x120A
 #define CL_COMMAND_SVM_MEMFILL_AMD                        0x120B
 #define CL_COMMAND_SVM_MAP_AMD                            0x120C
 #define CL_COMMAND_SVM_UNMAP_AMD                          0x120D
 typedef CL_API_ENTRY void*
 (CL_API_CALL * clSVMAllocAMD_fn)(
    cl_context            /* context */,
    cl_svm_mem_flags_amd  /* flags */,
    size_t                /* size */,
    unsigned int          /* alignment */
 ) CL_EXT_SUFFIX__VERSION_1_2;
 typedef CL_API_ENTRY void
 (CL_API_CALL * clSVMFreeAMD_fn)(
    cl_context  /* context */,
    void*       /* svm_pointer */
 ) CL_EXT_SUFFIX__VERSION_1_2;
 typedef CL_API_ENTRY cl_int
 (CL_API_CALL * clEnqueueSVMFreeAMD_fn)(
    cl_command_queue /* command_queue */,
    cl_uint          /* num_svm_pointers */,
    void**           /* svm_pointers */,
    void (CL_CALLBACK *)( /*pfn_free_func*/
        cl_command_queue /* queue */,
        cl_uint          /* num_svm_pointers */,
        void**           /* svm_pointers */,
        void*            /* user_data */),
    void*             /* user_data */,
    cl_uint           /* num_events_in_wait_list */,
    const cl_event*   /* event_wait_list */,
    cl_event*         /* event */
 ) CL_EXT_SUFFIX__VERSION_1_2;
 typedef CL_API_ENTRY cl_int
 (CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)(
    cl_command_queue /* command_queue */,
    cl_bool          /* blocking_copy */,
    void*            /* dst_ptr */,
    const void*      /* src_ptr */,
    size_t           /* size */,
    cl_uint          /* num_events_in_wait_list */,
    const cl_event*  /* event_wait_list */,
    cl_event*        /* event */
 ) CL_EXT_SUFFIX__VERSION_1_2;
 typedef CL_API_ENTRY cl_int
 (CL_API_CALL * clEnqueueSVMMemFillAMD_fn)(
    cl_command_queue /* command_queue */,
    void*            /* svm_ptr */,
    const void*      /* pattern */,
    size_t           /* pattern_size */,
    size_t           /* size */,
    cl_uint          /* num_events_in_wait_list */,
    const cl_event*  /* event_wait_list */,
    cl_event*        /* event */
 ) CL_EXT_SUFFIX__VERSION_1_2;
 typedef CL_API_ENTRY cl_int
 (CL_API_CALL * clEnqueueSVMMapAMD_fn)(
    cl_command_queue /* command_queue */,
    cl_bool          /* blocking_map */,
    cl_map_flags     /* map_flags */,
    void*            /* svm_ptr */,
    size_t           /* size */,
    cl_uint          /* num_events_in_wait_list */,
    const cl_event*  /* event_wait_list */,
    cl_event*        /* event */
 ) CL_EXT_SUFFIX__VERSION_1_2;
 typedef CL_API_ENTRY cl_int
 (CL_API_CALL * clEnqueueSVMUnmapAMD_fn)(
    cl_command_queue /* command_queue */,
    void*            /* svm_ptr */,
    cl_uint          /* num_events_in_wait_list */,
    const cl_event*  /* event_wait_list */,
    cl_event*        /* event */
 ) CL_EXT_SUFFIX__VERSION_1_2;
 typedef CL_API_ENTRY cl_int
 (CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)(
    cl_kernel     /* kernel */,
    cl_uint       /* arg_index */,
    const void *  /* arg_value */
 ) CL_EXT_SUFFIX__VERSION_1_2;
 typedef CL_API_ENTRY cl_int
 (CL_API_CALL * clSetKernelExecInfoAMD_fn)(
     cl_kernel                /* kernel */,
     cl_kernel_exec_info_amd  /* param_name */,
     size_t                   /* param_value_size */,
     const void *             /* param_value */
 ) CL_EXT_SUFFIX__VERSION_1_2;
 #endif
 #endif // HAVE_OPENCL_SVM
 #endif // OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP
--- a/3rdparty/libopencv/include/opencv2/core/opengl.hpp
+++ b/3rdparty/libopencv/include/opencv2/core/opengl.hpp
@ -0,0 +1,729 @@
 /*M///////////////////////////////////////////////////////////////////////////////////////
 //
 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
 //
 //  By downloading, copying, installing or using the software you agree to this license.
 //  If you do not agree to this license, do not download, install,
 //  copy or use the software.
 //
 //
 //                           License Agreement
 //                For Open Source Computer Vision Library
 //
 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
 // Third party copyrights are property of their respective owners.
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
 //
 //   * Redistribution's of source code must retain the above copyright notice,
 //     this list of conditions and the following disclaimer.
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
 //     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
 //
 // This software is provided by the copyright holders and contributors "as is" and
 // any express or implied warranties, including, but not limited to, the implied
 // warranties of merchantability and fitness for a particular purpose are disclaimed.
 // In no event shall the Intel Corporation or contributors be liable for any direct,
 // indirect, incidental, special, exemplary, or consequential damages
 // (including, but not limited to, procurement of substitute goods or services;
 // loss of use, data, or profits; or business interruption) however caused
 // and on any theory of liability, whether in contract, strict liability,
 // or tort (including negligence or otherwise) arising in any way out of
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
 #ifndef OPENCV_CORE_OPENGL_HPP
 #define OPENCV_CORE_OPENGL_HPP
 #ifndef __cplusplus
 #  error opengl.hpp header must be compiled as C++
 #endif
 #include "opencv2/core.hpp"
 #include "ocl.hpp"
 namespace cv { namespace ogl {
 /** @addtogroup core_opengl
 This section describes OpenGL interoperability.
 To enable OpenGL support, configure OpenCV using CMake with WITH_OPENGL=ON . Currently OpenGL is
 supported only with WIN32, GTK and Qt backends on Windows and Linux (MacOS and Android are not
 supported). For GTK backend gtkglext-1.0 library is required.
 To use OpenGL functionality you should first create OpenGL context (window or frame buffer). You can
 do this with namedWindow function or with other OpenGL toolkit (GLUT, for example).
 */
 //! @{
 /////////////////// OpenGL Objects ///////////////////
 /** @brief Smart pointer for OpenGL buffer object with reference counting.
 Buffer Objects are OpenGL objects that store an array of unformatted memory allocated by the OpenGL
 context. These can be used to store vertex data, pixel data retrieved from images or the
 framebuffer, and a variety of other things.
 ogl::Buffer has interface similar with Mat interface and represents 2D array memory.
 ogl::Buffer supports memory transfers between host and device and also can be mapped to CUDA memory.
 */
 class CV_EXPORTS Buffer
 {
 public:
    /** @brief The target defines how you intend to use the buffer object.
    */
    enum Target
    {
        ARRAY_BUFFER         = 0x8892, //!< The buffer will be used as a source for vertex data
        ELEMENT_ARRAY_BUFFER = 0x8893, //!< The buffer will be used for indices (in glDrawElements, for example)
        PIXEL_PACK_BUFFER    = 0x88EB, //!< The buffer will be used for reading from OpenGL textures
        PIXEL_UNPACK_BUFFER  = 0x88EC  //!< The buffer will be used for writing to OpenGL textures
    };
    enum Access
    {
        READ_ONLY  = 0x88B8,
        WRITE_ONLY = 0x88B9,
        READ_WRITE = 0x88BA
    };
    /** @brief The constructors.
    Creates empty ogl::Buffer object, creates ogl::Buffer object from existed buffer ( abufId
    parameter), allocates memory for ogl::Buffer object or copies from host/device memory.
     */
    Buffer();
    /** @overload
    @param arows Number of rows in a 2D array.
    @param acols Number of columns in a 2D array.
    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
    @param abufId Buffer object name.
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    Buffer(int arows, int acols, int atype, unsigned int abufId, bool autoRelease = false);
    /** @overload
    @param asize 2D array size.
    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
    @param abufId Buffer object name.
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    Buffer(Size asize, int atype, unsigned int abufId, bool autoRelease = false);
    /** @overload
    @param arows Number of rows in a 2D array.
    @param acols Number of columns in a 2D array.
    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
    @param target Buffer usage. See cv::ogl::Buffer::Target .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    Buffer(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
    /** @overload
    @param asize 2D array size.
    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
    @param target Buffer usage. See cv::ogl::Buffer::Target .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    Buffer(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
    /** @overload
    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
    @param target Buffer usage. See cv::ogl::Buffer::Target .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    explicit Buffer(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
    /** @brief Allocates memory for ogl::Buffer object.
    @param arows Number of rows in a 2D array.
    @param acols Number of columns in a 2D array.
    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
    @param target Buffer usage. See cv::ogl::Buffer::Target .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
     */
    void create(int arows, int acols, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
    /** @overload
    @param asize 2D array size.
    @param atype Array type ( CV_8UC1, ..., CV_64FC4 ). See Mat for details.
    @param target Buffer usage. See cv::ogl::Buffer::Target .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    void create(Size asize, int atype, Target target = ARRAY_BUFFER, bool autoRelease = false);
    /** @brief Decrements the reference counter and destroys the buffer object if needed.
    The function will call setAutoRelease(true) .
     */
    void release();
    /** @brief Sets auto release mode.
    The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
    bound to a window it could be released at any time (user can close a window). If object's destructor
    is called after destruction of the context it will cause an error. Thus ogl::Buffer doesn't destroy
    OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL context).
    This function can force ogl::Buffer destructor to destroy OpenGL object.
    @param flag Auto release mode (if true, release will be called in object's destructor).
     */
    void setAutoRelease(bool flag);
    /** @brief Copies from host/device memory to OpenGL buffer.
    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or std::vector ).
    @param target Buffer usage. See cv::ogl::Buffer::Target .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
     */
    void copyFrom(InputArray arr, Target target = ARRAY_BUFFER, bool autoRelease = false);
    /** @overload */
    void copyFrom(InputArray arr, cuda::Stream& stream, Target target = ARRAY_BUFFER, bool autoRelease = false);
    /** @brief Copies from OpenGL buffer to host/device memory or another OpenGL buffer object.
    @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , std::vector or
    ogl::Buffer ).
     */
    void copyTo(OutputArray arr) const;
    /** @overload */
    void copyTo(OutputArray arr, cuda::Stream& stream) const;
    /** @brief Creates a full copy of the buffer object and the underlying data.
    @param target Buffer usage for destination buffer.
    @param autoRelease Auto release mode for destination buffer.
     */
    Buffer clone(Target target = ARRAY_BUFFER, bool autoRelease = false) const;
    /** @brief Binds OpenGL buffer to the specified buffer binding point.
    @param target Binding point. See cv::ogl::Buffer::Target .
     */
    void bind(Target target) const;
    /** @brief Unbind any buffers from the specified binding point.
    @param target Binding point. See cv::ogl::Buffer::Target .
     */
    static void unbind(Target target);
    /** @brief Maps OpenGL buffer to host memory.
    mapHost maps to the client's address space the entire data store of the buffer object. The data can
    then be directly read and/or written relative to the returned pointer, depending on the specified
    access policy.
    A mapped data store must be unmapped with ogl::Buffer::unmapHost before its buffer object is used.
    This operation can lead to memory transfers between host and device.
    Only one buffer object can be mapped at a time.
    @param access Access policy, indicating whether it will be possible to read from, write to, or both
    read from and write to the buffer object's mapped data store. The symbolic constant must be
    ogl::Buffer::READ_ONLY , ogl::Buffer::WRITE_ONLY or ogl::Buffer::READ_WRITE .
     */
    Mat mapHost(Access access);
    /** @brief Unmaps OpenGL buffer.
    */
    void unmapHost();
    //! map to device memory (blocking)
    cuda::GpuMat mapDevice();
    void unmapDevice();
    /** @brief Maps OpenGL buffer to CUDA device memory.
    This operation doesn't copy data. Several buffer objects can be mapped to CUDA memory at a time.
    A mapped data store must be unmapped with ogl::Buffer::unmapDevice before its buffer object is used.
     */
    cuda::GpuMat mapDevice(cuda::Stream& stream);
    /** @brief Unmaps OpenGL buffer.
    */
    void unmapDevice(cuda::Stream& stream);
    int rows() const;
    int cols() const;
    Size size() const;
    bool empty() const;
    int type() const;
    int depth() const;
    int channels() const;
    int elemSize() const;
    int elemSize1() const;
    //! get OpenGL opject id
    unsigned int bufId() const;
    class Impl;
 private:
    Ptr<Impl> impl_;
    int rows_;
    int cols_;
    int type_;
 };
 /** @brief Smart pointer for OpenGL 2D texture memory with reference counting.
 */
 class CV_EXPORTS Texture2D
 {
 public:
    /** @brief An Image Format describes the way that the images in Textures store their data.
    */
    enum Format
    {
        NONE            = 0,
        DEPTH_COMPONENT = 0x1902, //!< Depth
        RGB             = 0x1907, //!< Red, Green, Blue
        RGBA            = 0x1908  //!< Red, Green, Blue, Alpha
    };
    /** @brief The constructors.
    Creates empty ogl::Texture2D object, allocates memory for ogl::Texture2D object or copies from
    host/device memory.
     */
    Texture2D();
    /** @overload */
    Texture2D(int arows, int acols, Format aformat, unsigned int atexId, bool autoRelease = false);
    /** @overload */
    Texture2D(Size asize, Format aformat, unsigned int atexId, bool autoRelease = false);
    /** @overload
    @param arows Number of rows.
    @param acols Number of columns.
    @param aformat Image format. See cv::ogl::Texture2D::Format .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    Texture2D(int arows, int acols, Format aformat, bool autoRelease = false);
    /** @overload
    @param asize 2D array size.
    @param aformat Image format. See cv::ogl::Texture2D::Format .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    Texture2D(Size asize, Format aformat, bool autoRelease = false);
    /** @overload
    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    explicit Texture2D(InputArray arr, bool autoRelease = false);
    /** @brief Allocates memory for ogl::Texture2D object.
    @param arows Number of rows.
    @param acols Number of columns.
    @param aformat Image format. See cv::ogl::Texture2D::Format .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
     */
    void create(int arows, int acols, Format aformat, bool autoRelease = false);
    /** @overload
    @param asize 2D array size.
    @param aformat Image format. See cv::ogl::Texture2D::Format .
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
    */
    void create(Size asize, Format aformat, bool autoRelease = false);
    /** @brief Decrements the reference counter and destroys the texture object if needed.
    The function will call setAutoRelease(true) .
     */
    void release();
    /** @brief Sets auto release mode.
    @param flag Auto release mode (if true, release will be called in object's destructor).
    The lifetime of the OpenGL object is tied to the lifetime of the context. If OpenGL context was
    bound to a window it could be released at any time (user can close a window). If object's destructor
    is called after destruction of the context it will cause an error. Thus ogl::Texture2D doesn't
    destroy OpenGL object in destructor by default (all OpenGL resources will be released with OpenGL
    context). This function can force ogl::Texture2D destructor to destroy OpenGL object.
     */
    void setAutoRelease(bool flag);
    /** @brief Copies from host/device memory to OpenGL texture.
    @param arr Input array (host or device memory, it can be Mat , cuda::GpuMat or ogl::Buffer ).
    @param autoRelease Auto release mode (if true, release will be called in object's destructor).
     */
    void copyFrom(InputArray arr, bool autoRelease = false);
    /** @brief Copies from OpenGL texture to host/device memory or another OpenGL texture object.
    @param arr Destination array (host or device memory, can be Mat , cuda::GpuMat , ogl::Buffer or
    ogl::Texture2D ).
    @param ddepth Destination depth.
    @param autoRelease Auto release mode for destination buffer (if arr is OpenGL buffer or texture).
     */
    void copyTo(OutputArray arr, int ddepth = CV_32F, bool autoRelease = false) const;
    /** @brief Binds texture to current active texture unit for GL_TEXTURE_2D target.
    */
    void bind() const;
    int rows() const;
    int cols() const;
    Size size() const;
    bool empty() const;
    Format format() const;
    //! get OpenGL opject id
    unsigned int texId() const;
    class Impl;
 private:
    Ptr<Impl> impl_;
    int rows_;
    int cols_;
    Format format_;
 };
 /** @brief Wrapper for OpenGL Client-Side Vertex arrays.
 ogl::Arrays stores vertex data in ogl::Buffer objects.
 */
 class CV_EXPORTS Arrays
 {
 public:
    /** @brief Default constructor
     */
    Arrays();
    /** @brief Sets an array of vertex coordinates.
    @param vertex array with vertex coordinates, can be both host and device memory.
    */
    void setVertexArray(InputArray vertex);
    /** @brief Resets vertex coordinates.
    */
    void resetVertexArray();
    /** @brief Sets an array of vertex colors.
    @param color array with vertex colors, can be both host and device memory.
     */
    void setColorArray(InputArray color);
    /** @brief Resets vertex colors.
    */
    void resetColorArray();
    /** @brief Sets an array of vertex normals.
    @param normal array with vertex normals, can be both host and device memory.
     */
    void setNormalArray(InputArray normal);
    /** @brief Resets vertex normals.
    */
    void resetNormalArray();
    /** @brief Sets an array of vertex texture coordinates.
    @param texCoord array with vertex texture coordinates, can be both host and device memory.
     */
    void setTexCoordArray(InputArray texCoord);
    /** @brief Resets vertex texture coordinates.
    */
    void resetTexCoordArray();
    /** @brief Releases all inner buffers.
    */
    void release();
    /** @brief Sets auto release mode all inner buffers.
    @param flag Auto release mode.
     */
    void setAutoRelease(bool flag);
    /** @brief Binds all vertex arrays.
    */
    void bind() const;
    /** @brief Returns the vertex count.
    */
    int size() const;
    bool empty() const;
 private:
    int size_;
    Buffer vertex_;
    Buffer color_;
    Buffer normal_;
    Buffer texCoord_;
 };
 /////////////////// Render Functions ///////////////////
 //! render mode
 enum RenderModes {
    POINTS         = 0x0000,
    LINES          = 0x0001,
    LINE_LOOP      = 0x0002,
    LINE_STRIP     = 0x0003,
    TRIANGLES      = 0x0004,
    TRIANGLE_STRIP = 0x0005,
    TRIANGLE_FAN   = 0x0006,
    QUADS          = 0x0007,
    QUAD_STRIP     = 0x0008,
    POLYGON        = 0x0009
 };
 /** @brief Render OpenGL texture or primitives.
@param tex Texture to draw.
@param wndRect Region of window, where to draw a texture (normalized coordinates).
@param texRect Region of texture to draw (normalized coordinates).
 */
 CV_EXPORTS void render(const Texture2D& tex,
    Rect_<double> wndRect = Rect_<double>(0.0, 0.0, 1.0, 1.0),
    Rect_<double> texRect = Rect_<double>(0.0, 0.0, 1.0, 1.0));
 /** @overload
@param arr Array of privitives vertices.
@param mode Render mode. One of cv::ogl::RenderModes
@param color Color for all vertices. Will be used if arr doesn't contain color array.
 */
 CV_EXPORTS void render(const Arrays& arr, int mode = POINTS, Scalar color = Scalar::all(255));
 /** @overload
@param arr Array of privitives vertices.
@param indices Array of vertices indices (host or device memory).
@param mode Render mode. One of cv::ogl::RenderModes
@param color Color for all vertices. Will be used if arr doesn't contain color array.
 */
 CV_EXPORTS void render(const Arrays& arr, InputArray indices, int mode = POINTS, Scalar color = Scalar::all(255));
 /////////////////// CL-GL Interoperability Functions ///////////////////
 namespace ocl {
 using namespace cv::ocl;
 // TODO static functions in the Context class
 /** @brief Creates OpenCL context from GL.
@return Returns reference to OpenCL Context
 */
 CV_EXPORTS Context& initializeContextFromGL();
 } // namespace cv::ogl::ocl
 /** @brief Converts InputArray to Texture2D object.
@param src     - source InputArray.
@param texture - destination Texture2D object.
 */
 CV_EXPORTS void convertToGLTexture2D(InputArray src, Texture2D& texture);
 /** @brief Converts Texture2D object to OutputArray.
@param texture - source Texture2D object.
@param dst     - destination OutputArray.
 */
 CV_EXPORTS void convertFromGLTexture2D(const Texture2D& texture, OutputArray dst);
 /** @brief Maps Buffer object to process on CL side (convert to UMat).
 Function creates CL buffer from GL one, and then constructs UMat that can be used
 to process buffer data with OpenCV functions. Note that in current implementation
 UMat constructed this way doesn't own corresponding GL buffer object, so it is
 the user responsibility to close down CL/GL buffers relationships by explicitly
 calling unmapGLBuffer() function.
@param buffer      - source Buffer object.
@param accessFlags - data access flags (ACCESS_READ|ACCESS_WRITE).
@return Returns UMat object
 */
 CV_EXPORTS UMat mapGLBuffer(const Buffer& buffer, int accessFlags = ACCESS_READ|ACCESS_WRITE);
 /** @brief Unmaps Buffer object (releases UMat, previously mapped from Buffer).
 Function must be called explicitly by the user for each UMat previously constructed
 by the call to mapGLBuffer() function.
@param u           - source UMat, created by mapGLBuffer().
 */
 CV_EXPORTS void unmapGLBuffer(UMat& u);
 }} // namespace cv::ogl
 namespace cv { namespace cuda {
 //! @addtogroup cuda
 //! @{
 /** @brief Sets a CUDA device and initializes it for the current thread with OpenGL interoperability.
 This function should be explicitly called after OpenGL context creation and before any CUDA calls.
@param device System index of a CUDA device starting with 0.
@ingroup core_opengl
 */
 CV_EXPORTS void setGlDevice(int device = 0);
 //! @}
 }}
 //! @cond IGNORED
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
 inline
 cv::ogl::Buffer::Buffer(int arows, int acols, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
 {
    create(arows, acols, atype, target, autoRelease);
 }
 inline
 cv::ogl::Buffer::Buffer(Size asize, int atype, Target target, bool autoRelease) : rows_(0), cols_(0), type_(0)
 {
    create(asize, atype, target, autoRelease);
 }
 inline
 void cv::ogl::Buffer::create(Size asize, int atype, Target target, bool autoRelease)
 {
    create(asize.height, asize.width, atype, target, autoRelease);
 }
 inline
 int cv::ogl::Buffer::rows() const
 {
    return rows_;
 }
 inline
 int cv::ogl::Buffer::cols() const
 {
    return cols_;
 }
 inline
 cv::Size cv::ogl::Buffer::size() const
 {
    return Size(cols_, rows_);
 }
 inline
 bool cv::ogl::Buffer::empty() const
 {
    return rows_ == 0 || cols_ == 0;
 }
 inline
 int cv::ogl::Buffer::type() const
 {
    return type_;
 }
 inline
 int cv::ogl::Buffer::depth() const
 {
    return CV_MAT_DEPTH(type_);
 }
 inline
 int cv::ogl::Buffer::channels() const
 {
    return CV_MAT_CN(type_);
 }
 inline
 int cv::ogl::Buffer::elemSize() const
 {
    return CV_ELEM_SIZE(type_);
 }
 inline
 int cv::ogl::Buffer::elemSize1() const
 {
    return CV_ELEM_SIZE1(type_);
 }
 ///////
 inline
 cv::ogl::Texture2D::Texture2D(int arows, int acols, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
 {
    create(arows, acols, aformat, autoRelease);
 }
 inline
 cv::ogl::Texture2D::Texture2D(Size asize, Format aformat, bool autoRelease) : rows_(0), cols_(0), format_(NONE)
 {
    create(asize, aformat, autoRelease);
 }
 inline
 void cv::ogl::Texture2D::create(Size asize, Format aformat, bool autoRelease)
 {
    create(asize.height, asize.width, aformat, autoRelease);
 }
 inline
 int cv::ogl::Texture2D::rows() const
 {
    return rows_;
 }
 inline
 int cv::ogl::Texture2D::cols() const
 {
    return cols_;
 }
 inline
 cv::Size cv::ogl::Texture2D::size() const
 {
    return Size(cols_, rows_);
 }
 inline
 bool cv::ogl::Texture2D::empty() const
 {
    return rows_ == 0 || cols_ == 0;
 }
 inline
 cv::ogl::Texture2D::Format cv::ogl::Texture2D::format() const
 {
    return format_;
 }
 ///////
 inline
 cv::ogl::Arrays::Arrays() : size_(0)
 {
 }
 inline
 int cv::ogl::Arrays::size() const
 {
    return size_;
 }
 inline
 bool cv::ogl::Arrays::empty() const
 {
    return size_ == 0;
 }
 //! @endcond
 #endif /* OPENCV_CORE_OPENGL_HPP */
--- a/Show More
+++ b/Show More