Add zlmediakit.
This commit is contained in:
250
3rdparty/libopencv/include/opencv2/core/hal/hal.hpp
vendored
Normal file
250
3rdparty/libopencv/include/opencv2/core/hal/hal.hpp
vendored
Normal file
@ -0,0 +1,250 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_HAL_HPP
|
||||
#define OPENCV_HAL_HPP
|
||||
|
||||
#include "opencv2/core/cvdef.h"
|
||||
#include "opencv2/core/cvstd.hpp"
|
||||
#include "opencv2/core/hal/interface.h"
|
||||
|
||||
namespace cv { namespace hal {
|
||||
|
||||
//! @addtogroup core_hal_functions
|
||||
//! @{
|
||||
|
||||
CV_EXPORTS int normHamming(const uchar* a, int n);
|
||||
CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n);
|
||||
|
||||
CV_EXPORTS int normHamming(const uchar* a, int n, int cellSize);
|
||||
CV_EXPORTS int normHamming(const uchar* a, const uchar* b, int n, int cellSize);
|
||||
|
||||
CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||
CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||
CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||
CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||
CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
|
||||
CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
|
||||
CV_EXPORTS int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors);
|
||||
CV_EXPORTS int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors);
|
||||
|
||||
CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||
int m_a, int n_a, int n_d, int flags);
|
||||
CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||
int m_a, int n_a, int n_d, int flags);
|
||||
CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||
int m_a, int n_a, int n_d, int flags);
|
||||
CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||
int m_a, int n_a, int n_d, int flags);
|
||||
|
||||
CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
|
||||
CV_EXPORTS float normL1_(const float* a, const float* b, int n);
|
||||
CV_EXPORTS float normL2Sqr_(const float* a, const float* b, int n);
|
||||
|
||||
CV_EXPORTS void exp32f(const float* src, float* dst, int n);
|
||||
CV_EXPORTS void exp64f(const double* src, double* dst, int n);
|
||||
CV_EXPORTS void log32f(const float* src, float* dst, int n);
|
||||
CV_EXPORTS void log64f(const double* src, double* dst, int n);
|
||||
|
||||
CV_EXPORTS void fastAtan32f(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
|
||||
CV_EXPORTS void fastAtan64f(const double* y, const double* x, double* dst, int n, bool angleInDegrees);
|
||||
CV_EXPORTS void magnitude32f(const float* x, const float* y, float* dst, int n);
|
||||
CV_EXPORTS void magnitude64f(const double* x, const double* y, double* dst, int n);
|
||||
CV_EXPORTS void sqrt32f(const float* src, float* dst, int len);
|
||||
CV_EXPORTS void sqrt64f(const double* src, double* dst, int len);
|
||||
CV_EXPORTS void invSqrt32f(const float* src, float* dst, int len);
|
||||
CV_EXPORTS void invSqrt64f(const double* src, double* dst, int len);
|
||||
|
||||
CV_EXPORTS void split8u(const uchar* src, uchar** dst, int len, int cn );
|
||||
CV_EXPORTS void split16u(const ushort* src, ushort** dst, int len, int cn );
|
||||
CV_EXPORTS void split32s(const int* src, int** dst, int len, int cn );
|
||||
CV_EXPORTS void split64s(const int64* src, int64** dst, int len, int cn );
|
||||
|
||||
CV_EXPORTS void merge8u(const uchar** src, uchar* dst, int len, int cn );
|
||||
CV_EXPORTS void merge16u(const ushort** src, ushort* dst, int len, int cn );
|
||||
CV_EXPORTS void merge32s(const int** src, int* dst, int len, int cn );
|
||||
CV_EXPORTS void merge64s(const int64** src, int64* dst, int len, int cn );
|
||||
|
||||
CV_EXPORTS void add8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void add8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void add16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void add16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void add32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void add32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void add64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||
|
||||
CV_EXPORTS void sub8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void sub8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void sub16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void sub16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void sub32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void sub32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void sub64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||
|
||||
CV_EXPORTS void max8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void max8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void max16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void max16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void max32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void max32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void max64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||
|
||||
CV_EXPORTS void min8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void min8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void min16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void min16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void min32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void min32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void min64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||
|
||||
CV_EXPORTS void absdiff8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void absdiff8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void absdiff16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void absdiff16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void absdiff32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void absdiff32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void absdiff64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* );
|
||||
|
||||
CV_EXPORTS void and8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void or8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void xor8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||
CV_EXPORTS void not8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* );
|
||||
|
||||
CV_EXPORTS void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||
CV_EXPORTS void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||
CV_EXPORTS void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||
CV_EXPORTS void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||
CV_EXPORTS void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||
CV_EXPORTS void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||
CV_EXPORTS void cmp64f(const double* src1, size_t step1, const double* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _cmpop);
|
||||
|
||||
CV_EXPORTS void mul8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void mul8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void mul16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void mul16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void mul32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void mul32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void mul64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
|
||||
|
||||
CV_EXPORTS void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void div16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void div16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
|
||||
|
||||
CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale);
|
||||
CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale);
|
||||
|
||||
CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars );
|
||||
CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars );
|
||||
CV_EXPORTS void addWeighted16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scalars );
|
||||
CV_EXPORTS void addWeighted16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scalars );
|
||||
CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scalars );
|
||||
CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars );
|
||||
CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars );
|
||||
|
||||
struct CV_EXPORTS DFT1D
|
||||
{
|
||||
static Ptr<DFT1D> create(int len, int count, int depth, int flags, bool * useBuffer = 0);
|
||||
virtual void apply(const uchar *src, uchar *dst) = 0;
|
||||
virtual ~DFT1D() {}
|
||||
};
|
||||
|
||||
struct CV_EXPORTS DFT2D
|
||||
{
|
||||
static Ptr<DFT2D> create(int width, int height, int depth,
|
||||
int src_channels, int dst_channels,
|
||||
int flags, int nonzero_rows = 0);
|
||||
virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
|
||||
virtual ~DFT2D() {}
|
||||
};
|
||||
|
||||
struct CV_EXPORTS DCT2D
|
||||
{
|
||||
static Ptr<DCT2D> create(int width, int height, int depth, int flags);
|
||||
virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0;
|
||||
virtual ~DCT2D() {}
|
||||
};
|
||||
|
||||
//! @} core_hal
|
||||
|
||||
//=============================================================================
|
||||
// for binary compatibility with 3.0
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
CV_EXPORTS int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||
CV_EXPORTS int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||
CV_EXPORTS bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||
CV_EXPORTS bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||
|
||||
CV_EXPORTS void exp(const float* src, float* dst, int n);
|
||||
CV_EXPORTS void exp(const double* src, double* dst, int n);
|
||||
CV_EXPORTS void log(const float* src, float* dst, int n);
|
||||
CV_EXPORTS void log(const double* src, double* dst, int n);
|
||||
|
||||
CV_EXPORTS void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees);
|
||||
CV_EXPORTS void magnitude(const float* x, const float* y, float* dst, int n);
|
||||
CV_EXPORTS void magnitude(const double* x, const double* y, double* dst, int n);
|
||||
CV_EXPORTS void sqrt(const float* src, float* dst, int len);
|
||||
CV_EXPORTS void sqrt(const double* src, double* dst, int len);
|
||||
CV_EXPORTS void invSqrt(const float* src, float* dst, int len);
|
||||
CV_EXPORTS void invSqrt(const double* src, double* dst, int len);
|
||||
|
||||
//! @endcond
|
||||
|
||||
}} //cv::hal
|
||||
|
||||
#endif //OPENCV_HAL_HPP
|
182
3rdparty/libopencv/include/opencv2/core/hal/interface.h
vendored
Normal file
182
3rdparty/libopencv/include/opencv2/core/hal/interface.h
vendored
Normal file
@ -0,0 +1,182 @@
|
||||
#ifndef OPENCV_CORE_HAL_INTERFACE_H
|
||||
#define OPENCV_CORE_HAL_INTERFACE_H
|
||||
|
||||
//! @addtogroup core_hal_interface
|
||||
//! @{
|
||||
|
||||
//! @name Return codes
|
||||
//! @{
|
||||
#define CV_HAL_ERROR_OK 0
|
||||
#define CV_HAL_ERROR_NOT_IMPLEMENTED 1
|
||||
#define CV_HAL_ERROR_UNKNOWN -1
|
||||
//! @}
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include <cstddef>
|
||||
#else
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#endif
|
||||
|
||||
//! @name Data types
|
||||
//! primitive types
|
||||
//! - schar - signed 1 byte integer
|
||||
//! - uchar - unsigned 1 byte integer
|
||||
//! - short - signed 2 byte integer
|
||||
//! - ushort - unsigned 2 byte integer
|
||||
//! - int - signed 4 byte integer
|
||||
//! - uint - unsigned 4 byte integer
|
||||
//! - int64 - signed 8 byte integer
|
||||
//! - uint64 - unsigned 8 byte integer
|
||||
//! @{
|
||||
#if !defined _MSC_VER && !defined __BORLANDC__
|
||||
# if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__
|
||||
# include <cstdint>
|
||||
# ifdef __NEWLIB__
|
||||
typedef unsigned int uint;
|
||||
# else
|
||||
typedef std::uint32_t uint;
|
||||
# endif
|
||||
# else
|
||||
# include <stdint.h>
|
||||
typedef uint32_t uint;
|
||||
# endif
|
||||
#else
|
||||
typedef unsigned uint;
|
||||
#endif
|
||||
|
||||
typedef signed char schar;
|
||||
|
||||
#ifndef __IPL_H__
|
||||
typedef unsigned char uchar;
|
||||
typedef unsigned short ushort;
|
||||
#endif
|
||||
|
||||
#if defined _MSC_VER || defined __BORLANDC__
|
||||
typedef __int64 int64;
|
||||
typedef unsigned __int64 uint64;
|
||||
# define CV_BIG_INT(n) n##I64
|
||||
# define CV_BIG_UINT(n) n##UI64
|
||||
#else
|
||||
typedef int64_t int64;
|
||||
typedef uint64_t uint64;
|
||||
# define CV_BIG_INT(n) n##LL
|
||||
# define CV_BIG_UINT(n) n##ULL
|
||||
#endif
|
||||
|
||||
#define CV_CN_MAX 512
|
||||
#define CV_CN_SHIFT 3
|
||||
#define CV_DEPTH_MAX (1 << CV_CN_SHIFT)
|
||||
|
||||
#define CV_8U 0
|
||||
#define CV_8S 1
|
||||
#define CV_16U 2
|
||||
#define CV_16S 3
|
||||
#define CV_32S 4
|
||||
#define CV_32F 5
|
||||
#define CV_64F 6
|
||||
#define CV_USRTYPE1 7
|
||||
|
||||
#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1)
|
||||
#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK)
|
||||
|
||||
#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT))
|
||||
#define CV_MAKE_TYPE CV_MAKETYPE
|
||||
|
||||
#define CV_8UC1 CV_MAKETYPE(CV_8U,1)
|
||||
#define CV_8UC2 CV_MAKETYPE(CV_8U,2)
|
||||
#define CV_8UC3 CV_MAKETYPE(CV_8U,3)
|
||||
#define CV_8UC4 CV_MAKETYPE(CV_8U,4)
|
||||
#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n))
|
||||
|
||||
#define CV_8SC1 CV_MAKETYPE(CV_8S,1)
|
||||
#define CV_8SC2 CV_MAKETYPE(CV_8S,2)
|
||||
#define CV_8SC3 CV_MAKETYPE(CV_8S,3)
|
||||
#define CV_8SC4 CV_MAKETYPE(CV_8S,4)
|
||||
#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n))
|
||||
|
||||
#define CV_16UC1 CV_MAKETYPE(CV_16U,1)
|
||||
#define CV_16UC2 CV_MAKETYPE(CV_16U,2)
|
||||
#define CV_16UC3 CV_MAKETYPE(CV_16U,3)
|
||||
#define CV_16UC4 CV_MAKETYPE(CV_16U,4)
|
||||
#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n))
|
||||
|
||||
#define CV_16SC1 CV_MAKETYPE(CV_16S,1)
|
||||
#define CV_16SC2 CV_MAKETYPE(CV_16S,2)
|
||||
#define CV_16SC3 CV_MAKETYPE(CV_16S,3)
|
||||
#define CV_16SC4 CV_MAKETYPE(CV_16S,4)
|
||||
#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n))
|
||||
|
||||
#define CV_32SC1 CV_MAKETYPE(CV_32S,1)
|
||||
#define CV_32SC2 CV_MAKETYPE(CV_32S,2)
|
||||
#define CV_32SC3 CV_MAKETYPE(CV_32S,3)
|
||||
#define CV_32SC4 CV_MAKETYPE(CV_32S,4)
|
||||
#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n))
|
||||
|
||||
#define CV_32FC1 CV_MAKETYPE(CV_32F,1)
|
||||
#define CV_32FC2 CV_MAKETYPE(CV_32F,2)
|
||||
#define CV_32FC3 CV_MAKETYPE(CV_32F,3)
|
||||
#define CV_32FC4 CV_MAKETYPE(CV_32F,4)
|
||||
#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n))
|
||||
|
||||
#define CV_64FC1 CV_MAKETYPE(CV_64F,1)
|
||||
#define CV_64FC2 CV_MAKETYPE(CV_64F,2)
|
||||
#define CV_64FC3 CV_MAKETYPE(CV_64F,3)
|
||||
#define CV_64FC4 CV_MAKETYPE(CV_64F,4)
|
||||
#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n))
|
||||
//! @}
|
||||
|
||||
//! @name Comparison operation
|
||||
//! @sa cv::CmpTypes
|
||||
//! @{
|
||||
#define CV_HAL_CMP_EQ 0
|
||||
#define CV_HAL_CMP_GT 1
|
||||
#define CV_HAL_CMP_GE 2
|
||||
#define CV_HAL_CMP_LT 3
|
||||
#define CV_HAL_CMP_LE 4
|
||||
#define CV_HAL_CMP_NE 5
|
||||
//! @}
|
||||
|
||||
//! @name Border processing modes
|
||||
//! @sa cv::BorderTypes
|
||||
//! @{
|
||||
#define CV_HAL_BORDER_CONSTANT 0
|
||||
#define CV_HAL_BORDER_REPLICATE 1
|
||||
#define CV_HAL_BORDER_REFLECT 2
|
||||
#define CV_HAL_BORDER_WRAP 3
|
||||
#define CV_HAL_BORDER_REFLECT_101 4
|
||||
#define CV_HAL_BORDER_TRANSPARENT 5
|
||||
#define CV_HAL_BORDER_ISOLATED 16
|
||||
//! @}
|
||||
|
||||
//! @name DFT flags
|
||||
//! @{
|
||||
#define CV_HAL_DFT_INVERSE 1
|
||||
#define CV_HAL_DFT_SCALE 2
|
||||
#define CV_HAL_DFT_ROWS 4
|
||||
#define CV_HAL_DFT_COMPLEX_OUTPUT 16
|
||||
#define CV_HAL_DFT_REAL_OUTPUT 32
|
||||
#define CV_HAL_DFT_TWO_STAGE 64
|
||||
#define CV_HAL_DFT_STAGE_COLS 128
|
||||
#define CV_HAL_DFT_IS_CONTINUOUS 512
|
||||
#define CV_HAL_DFT_IS_INPLACE 1024
|
||||
//! @}
|
||||
|
||||
//! @name SVD flags
|
||||
//! @{
|
||||
#define CV_HAL_SVD_NO_UV 1
|
||||
#define CV_HAL_SVD_SHORT_UV 2
|
||||
#define CV_HAL_SVD_MODIFY_A 4
|
||||
#define CV_HAL_SVD_FULL_UV 8
|
||||
//! @}
|
||||
|
||||
//! @name Gemm flags
|
||||
//! @{
|
||||
#define CV_HAL_GEMM_1_T 1
|
||||
#define CV_HAL_GEMM_2_T 2
|
||||
#define CV_HAL_GEMM_3_T 4
|
||||
//! @}
|
||||
|
||||
//! @}
|
||||
|
||||
#endif
|
472
3rdparty/libopencv/include/opencv2/core/hal/intrin.hpp
vendored
Normal file
472
3rdparty/libopencv/include/opencv2/core/hal/intrin.hpp
vendored
Normal file
@ -0,0 +1,472 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_HAL_INTRIN_HPP
|
||||
#define OPENCV_HAL_INTRIN_HPP
|
||||
|
||||
#include <cmath>
|
||||
#include <float.h>
|
||||
#include <stdlib.h>
|
||||
#include "opencv2/core/cvdef.h"
|
||||
|
||||
#define OPENCV_HAL_ADD(a, b) ((a) + (b))
|
||||
#define OPENCV_HAL_AND(a, b) ((a) & (b))
|
||||
#define OPENCV_HAL_NOP(a) (a)
|
||||
#define OPENCV_HAL_1ST(a, b) (a)
|
||||
|
||||
// unlike HAL API, which is in cv::hal,
|
||||
// we put intrinsics into cv namespace to make its
|
||||
// access from within opencv code more accessible
|
||||
namespace cv {
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
|
||||
#ifdef CV_CPU_DISPATCH_MODE
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||
#else
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
|
||||
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
|
||||
#endif
|
||||
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
using namespace CV_CPU_OPTIMIZATION_HAL_NAMESPACE;
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
#endif
|
||||
|
||||
//! @addtogroup core_hal_intrin
|
||||
//! @{
|
||||
|
||||
//! @cond IGNORED
|
||||
template<typename _Tp> struct V_TypeTraits
|
||||
{
|
||||
typedef _Tp int_type;
|
||||
typedef _Tp uint_type;
|
||||
typedef _Tp abs_type;
|
||||
typedef _Tp sum_type;
|
||||
|
||||
enum { delta = 0, shift = 0 };
|
||||
|
||||
static int_type reinterpret_int(_Tp x) { return x; }
|
||||
static uint_type reinterpet_uint(_Tp x) { return x; }
|
||||
static _Tp reinterpret_from_int(int_type x) { return (_Tp)x; }
|
||||
};
|
||||
|
||||
template<> struct V_TypeTraits<uchar>
|
||||
{
|
||||
typedef uchar value_type;
|
||||
typedef schar int_type;
|
||||
typedef uchar uint_type;
|
||||
typedef uchar abs_type;
|
||||
typedef int sum_type;
|
||||
|
||||
typedef ushort w_type;
|
||||
typedef unsigned q_type;
|
||||
|
||||
enum { delta = 128, shift = 8 };
|
||||
|
||||
static int_type reinterpret_int(value_type x) { return (int_type)x; }
|
||||
static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
|
||||
static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
|
||||
};
|
||||
|
||||
template<> struct V_TypeTraits<schar>
|
||||
{
|
||||
typedef schar value_type;
|
||||
typedef schar int_type;
|
||||
typedef uchar uint_type;
|
||||
typedef uchar abs_type;
|
||||
typedef int sum_type;
|
||||
|
||||
typedef short w_type;
|
||||
typedef int q_type;
|
||||
|
||||
enum { delta = 128, shift = 8 };
|
||||
|
||||
static int_type reinterpret_int(value_type x) { return (int_type)x; }
|
||||
static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
|
||||
static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
|
||||
};
|
||||
|
||||
template<> struct V_TypeTraits<ushort>
|
||||
{
|
||||
typedef ushort value_type;
|
||||
typedef short int_type;
|
||||
typedef ushort uint_type;
|
||||
typedef ushort abs_type;
|
||||
typedef int sum_type;
|
||||
|
||||
typedef unsigned w_type;
|
||||
typedef uchar nu_type;
|
||||
|
||||
enum { delta = 32768, shift = 16 };
|
||||
|
||||
static int_type reinterpret_int(value_type x) { return (int_type)x; }
|
||||
static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
|
||||
static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
|
||||
};
|
||||
|
||||
template<> struct V_TypeTraits<short>
|
||||
{
|
||||
typedef short value_type;
|
||||
typedef short int_type;
|
||||
typedef ushort uint_type;
|
||||
typedef ushort abs_type;
|
||||
typedef int sum_type;
|
||||
|
||||
typedef int w_type;
|
||||
typedef uchar nu_type;
|
||||
typedef schar n_type;
|
||||
|
||||
enum { delta = 128, shift = 8 };
|
||||
|
||||
static int_type reinterpret_int(value_type x) { return (int_type)x; }
|
||||
static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
|
||||
static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
|
||||
};
|
||||
|
||||
template<> struct V_TypeTraits<unsigned>
|
||||
{
|
||||
typedef unsigned value_type;
|
||||
typedef int int_type;
|
||||
typedef unsigned uint_type;
|
||||
typedef unsigned abs_type;
|
||||
typedef unsigned sum_type;
|
||||
|
||||
typedef uint64 w_type;
|
||||
typedef ushort nu_type;
|
||||
|
||||
static int_type reinterpret_int(value_type x) { return (int_type)x; }
|
||||
static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
|
||||
static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
|
||||
};
|
||||
|
||||
template<> struct V_TypeTraits<int>
|
||||
{
|
||||
typedef int value_type;
|
||||
typedef int int_type;
|
||||
typedef unsigned uint_type;
|
||||
typedef unsigned abs_type;
|
||||
typedef int sum_type;
|
||||
|
||||
typedef int64 w_type;
|
||||
typedef short n_type;
|
||||
typedef ushort nu_type;
|
||||
|
||||
static int_type reinterpret_int(value_type x) { return (int_type)x; }
|
||||
static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
|
||||
static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
|
||||
};
|
||||
|
||||
template<> struct V_TypeTraits<uint64>
|
||||
{
|
||||
typedef uint64 value_type;
|
||||
typedef int64 int_type;
|
||||
typedef uint64 uint_type;
|
||||
typedef uint64 abs_type;
|
||||
typedef uint64 sum_type;
|
||||
|
||||
typedef unsigned nu_type;
|
||||
|
||||
static int_type reinterpret_int(value_type x) { return (int_type)x; }
|
||||
static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
|
||||
static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
|
||||
};
|
||||
|
||||
template<> struct V_TypeTraits<int64>
|
||||
{
|
||||
typedef int64 value_type;
|
||||
typedef int64 int_type;
|
||||
typedef uint64 uint_type;
|
||||
typedef uint64 abs_type;
|
||||
typedef int64 sum_type;
|
||||
|
||||
typedef int nu_type;
|
||||
|
||||
static int_type reinterpret_int(value_type x) { return (int_type)x; }
|
||||
static uint_type reinterpret_uint(value_type x) { return (uint_type)x; }
|
||||
static value_type reinterpret_from_int(int_type x) { return (value_type)x; }
|
||||
};
|
||||
|
||||
|
||||
template<> struct V_TypeTraits<float>
|
||||
{
|
||||
typedef float value_type;
|
||||
typedef int int_type;
|
||||
typedef unsigned uint_type;
|
||||
typedef float abs_type;
|
||||
typedef float sum_type;
|
||||
|
||||
typedef double w_type;
|
||||
|
||||
static int_type reinterpret_int(value_type x)
|
||||
{
|
||||
Cv32suf u;
|
||||
u.f = x;
|
||||
return u.i;
|
||||
}
|
||||
static uint_type reinterpet_uint(value_type x)
|
||||
{
|
||||
Cv32suf u;
|
||||
u.f = x;
|
||||
return u.u;
|
||||
}
|
||||
static value_type reinterpret_from_int(int_type x)
|
||||
{
|
||||
Cv32suf u;
|
||||
u.i = x;
|
||||
return u.f;
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct V_TypeTraits<double>
|
||||
{
|
||||
typedef double value_type;
|
||||
typedef int64 int_type;
|
||||
typedef uint64 uint_type;
|
||||
typedef double abs_type;
|
||||
typedef double sum_type;
|
||||
static int_type reinterpret_int(value_type x)
|
||||
{
|
||||
Cv64suf u;
|
||||
u.f = x;
|
||||
return u.i;
|
||||
}
|
||||
static uint_type reinterpet_uint(value_type x)
|
||||
{
|
||||
Cv64suf u;
|
||||
u.f = x;
|
||||
return u.u;
|
||||
}
|
||||
static value_type reinterpret_from_int(int_type x)
|
||||
{
|
||||
Cv64suf u;
|
||||
u.i = x;
|
||||
return u.f;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct V_SIMD128Traits
|
||||
{
|
||||
enum { nlanes = 16 / sizeof(T) };
|
||||
};
|
||||
|
||||
//! @endcond
|
||||
|
||||
//! @}
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CV_DOXYGEN
|
||||
# undef CV_SSE2
|
||||
# undef CV_NEON
|
||||
# undef CV_VSX
|
||||
#endif
|
||||
|
||||
#if CV_SSE2
|
||||
|
||||
#include "opencv2/core/hal/intrin_sse.hpp"
|
||||
|
||||
#elif CV_NEON
|
||||
|
||||
#include "opencv2/core/hal/intrin_neon.hpp"
|
||||
|
||||
#elif CV_VSX
|
||||
|
||||
#include "opencv2/core/hal/intrin_vsx.hpp"
|
||||
|
||||
#else
|
||||
|
||||
#include "opencv2/core/hal/intrin_cpp.hpp"
|
||||
|
||||
#endif
|
||||
|
||||
//! @addtogroup core_hal_intrin
|
||||
//! @{
|
||||
|
||||
#ifndef CV_SIMD128
|
||||
//! Set to 1 if current compiler supports vector extensions (NEON or SSE is enabled)
|
||||
#define CV_SIMD128 0
|
||||
#endif
|
||||
|
||||
#ifndef CV_SIMD128_64F
|
||||
//! Set to 1 if current intrinsics implementation supports 64-bit float vectors
|
||||
#define CV_SIMD128_64F 0
|
||||
#endif
|
||||
|
||||
//! @}
|
||||
|
||||
//==================================================================================================
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
namespace cv {
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
#endif
|
||||
|
||||
template <typename R> struct V_RegTrait128;
|
||||
|
||||
template <> struct V_RegTrait128<uchar> {
|
||||
typedef v_uint8x16 reg;
|
||||
typedef v_uint16x8 w_reg;
|
||||
typedef v_uint32x4 q_reg;
|
||||
typedef v_uint8x16 u_reg;
|
||||
static v_uint8x16 zero() { return v_setzero_u8(); }
|
||||
static v_uint8x16 all(uchar val) { return v_setall_u8(val); }
|
||||
};
|
||||
|
||||
template <> struct V_RegTrait128<schar> {
|
||||
typedef v_int8x16 reg;
|
||||
typedef v_int16x8 w_reg;
|
||||
typedef v_int32x4 q_reg;
|
||||
typedef v_uint8x16 u_reg;
|
||||
static v_int8x16 zero() { return v_setzero_s8(); }
|
||||
static v_int8x16 all(schar val) { return v_setall_s8(val); }
|
||||
};
|
||||
|
||||
template <> struct V_RegTrait128<ushort> {
|
||||
typedef v_uint16x8 reg;
|
||||
typedef v_uint32x4 w_reg;
|
||||
typedef v_int16x8 int_reg;
|
||||
typedef v_uint16x8 u_reg;
|
||||
static v_uint16x8 zero() { return v_setzero_u16(); }
|
||||
static v_uint16x8 all(ushort val) { return v_setall_u16(val); }
|
||||
};
|
||||
|
||||
template <> struct V_RegTrait128<short> {
|
||||
typedef v_int16x8 reg;
|
||||
typedef v_int32x4 w_reg;
|
||||
typedef v_uint16x8 u_reg;
|
||||
static v_int16x8 zero() { return v_setzero_s16(); }
|
||||
static v_int16x8 all(short val) { return v_setall_s16(val); }
|
||||
};
|
||||
|
||||
template <> struct V_RegTrait128<unsigned> {
|
||||
typedef v_uint32x4 reg;
|
||||
typedef v_uint64x2 w_reg;
|
||||
typedef v_int32x4 int_reg;
|
||||
typedef v_uint32x4 u_reg;
|
||||
static v_uint32x4 zero() { return v_setzero_u32(); }
|
||||
static v_uint32x4 all(unsigned val) { return v_setall_u32(val); }
|
||||
};
|
||||
|
||||
template <> struct V_RegTrait128<int> {
|
||||
typedef v_int32x4 reg;
|
||||
typedef v_int64x2 w_reg;
|
||||
typedef v_uint32x4 u_reg;
|
||||
static v_int32x4 zero() { return v_setzero_s32(); }
|
||||
static v_int32x4 all(int val) { return v_setall_s32(val); }
|
||||
};
|
||||
|
||||
template <> struct V_RegTrait128<uint64> {
|
||||
typedef v_uint64x2 reg;
|
||||
static v_uint64x2 zero() { return v_setzero_u64(); }
|
||||
static v_uint64x2 all(uint64 val) { return v_setall_u64(val); }
|
||||
};
|
||||
|
||||
template <> struct V_RegTrait128<int64> {
|
||||
typedef v_int64x2 reg;
|
||||
static v_int64x2 zero() { return v_setzero_s64(); }
|
||||
static v_int64x2 all(int64 val) { return v_setall_s64(val); }
|
||||
};
|
||||
|
||||
template <> struct V_RegTrait128<float> {
|
||||
typedef v_float32x4 reg;
|
||||
typedef v_int32x4 int_reg;
|
||||
typedef v_float32x4 u_reg;
|
||||
static v_float32x4 zero() { return v_setzero_f32(); }
|
||||
static v_float32x4 all(float val) { return v_setall_f32(val); }
|
||||
};
|
||||
|
||||
#if CV_SIMD128_64F
|
||||
template <> struct V_RegTrait128<double> {
|
||||
typedef v_float64x2 reg;
|
||||
typedef v_int32x4 int_reg;
|
||||
typedef v_float64x2 u_reg;
|
||||
static v_float64x2 zero() { return v_setzero_f64(); }
|
||||
static v_float64x2 all(double val) { return v_setall_f64(val); }
|
||||
};
|
||||
#endif
|
||||
|
||||
inline unsigned int trailingZeros32(unsigned int value) {
|
||||
#if defined(_MSC_VER)
|
||||
#if (_MSC_VER < 1700) || defined(_M_ARM)
|
||||
unsigned long index = 0;
|
||||
_BitScanForward(&index, value);
|
||||
return (unsigned int)index;
|
||||
#else
|
||||
return _tzcnt_u32(value);
|
||||
#endif
|
||||
#elif defined(__GNUC__) || defined(__GNUG__)
|
||||
return __builtin_ctz(value);
|
||||
#elif defined(__ICC) || defined(__INTEL_COMPILER)
|
||||
return _bit_scan_forward(value);
|
||||
#elif defined(__clang__)
|
||||
return llvm.cttz.i32(value, true);
|
||||
#else
|
||||
static const int MultiplyDeBruijnBitPosition[32] = {
|
||||
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
|
||||
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
|
||||
return MultiplyDeBruijnBitPosition[((uint32_t)((value & -value) * 0x077CB531U)) >> 27];
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef CV_DOXYGEN
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
#endif
|
||||
|
||||
} // cv::
|
||||
|
||||
//! @endcond
|
||||
|
||||
#endif
|
1959
3rdparty/libopencv/include/opencv2/core/hal/intrin_cpp.hpp
vendored
Normal file
1959
3rdparty/libopencv/include/opencv2/core/hal/intrin_cpp.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1303
3rdparty/libopencv/include/opencv2/core/hal/intrin_neon.hpp
vendored
Normal file
1303
3rdparty/libopencv/include/opencv2/core/hal/intrin_neon.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1921
3rdparty/libopencv/include/opencv2/core/hal/intrin_sse.hpp
vendored
Normal file
1921
3rdparty/libopencv/include/opencv2/core/hal/intrin_sse.hpp
vendored
Normal file
File diff suppressed because it is too large
Load Diff
962
3rdparty/libopencv/include/opencv2/core/hal/intrin_vsx.hpp
vendored
Normal file
962
3rdparty/libopencv/include/opencv2/core/hal/intrin_vsx.hpp
vendored
Normal file
@ -0,0 +1,962 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef OPENCV_HAL_VSX_HPP
|
||||
#define OPENCV_HAL_VSX_HPP
|
||||
|
||||
#include <algorithm>
|
||||
#include "opencv2/core/utility.hpp"
|
||||
|
||||
#define CV_SIMD128 1
|
||||
#define CV_SIMD128_64F 1
|
||||
|
||||
/**
|
||||
* todo: supporting half precision for power9
|
||||
* convert instractions xvcvhpsp, xvcvsphp
|
||||
**/
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
//! @cond IGNORED
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
|
||||
|
||||
///////// Types ////////////
|
||||
|
||||
struct v_uint8x16
|
||||
{
|
||||
typedef uchar lane_type;
|
||||
enum { nlanes = 16 };
|
||||
vec_uchar16 val;
|
||||
|
||||
explicit v_uint8x16(const vec_uchar16& v) : val(v)
|
||||
{}
|
||||
v_uint8x16() : val(vec_uchar16_z)
|
||||
{}
|
||||
v_uint8x16(vec_bchar16 v) : val(vec_uchar16_c(v))
|
||||
{}
|
||||
v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
|
||||
uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
|
||||
: val(vec_uchar16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
|
||||
{}
|
||||
uchar get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
struct v_int8x16
|
||||
{
|
||||
typedef schar lane_type;
|
||||
enum { nlanes = 16 };
|
||||
vec_char16 val;
|
||||
|
||||
explicit v_int8x16(const vec_char16& v) : val(v)
|
||||
{}
|
||||
v_int8x16() : val(vec_char16_z)
|
||||
{}
|
||||
v_int8x16(vec_bchar16 v) : val(vec_char16_c(v))
|
||||
{}
|
||||
v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
|
||||
schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
|
||||
: val(vec_char16_set(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15))
|
||||
{}
|
||||
schar get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
struct v_uint16x8
|
||||
{
|
||||
typedef ushort lane_type;
|
||||
enum { nlanes = 8 };
|
||||
vec_ushort8 val;
|
||||
|
||||
explicit v_uint16x8(const vec_ushort8& v) : val(v)
|
||||
{}
|
||||
v_uint16x8() : val(vec_ushort8_z)
|
||||
{}
|
||||
v_uint16x8(vec_bshort8 v) : val(vec_ushort8_c(v))
|
||||
{}
|
||||
v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
|
||||
: val(vec_ushort8_set(v0, v1, v2, v3, v4, v5, v6, v7))
|
||||
{}
|
||||
ushort get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
struct v_int16x8
|
||||
{
|
||||
typedef short lane_type;
|
||||
enum { nlanes = 8 };
|
||||
vec_short8 val;
|
||||
|
||||
explicit v_int16x8(const vec_short8& v) : val(v)
|
||||
{}
|
||||
v_int16x8() : val(vec_short8_z)
|
||||
{}
|
||||
v_int16x8(vec_bshort8 v) : val(vec_short8_c(v))
|
||||
{}
|
||||
v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
|
||||
: val(vec_short8_set(v0, v1, v2, v3, v4, v5, v6, v7))
|
||||
{}
|
||||
short get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
struct v_uint32x4
|
||||
{
|
||||
typedef unsigned lane_type;
|
||||
enum { nlanes = 4 };
|
||||
vec_uint4 val;
|
||||
|
||||
explicit v_uint32x4(const vec_uint4& v) : val(v)
|
||||
{}
|
||||
v_uint32x4() : val(vec_uint4_z)
|
||||
{}
|
||||
v_uint32x4(vec_bint4 v) : val(vec_uint4_c(v))
|
||||
{}
|
||||
v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) : val(vec_uint4_set(v0, v1, v2, v3))
|
||||
{}
|
||||
uint get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
struct v_int32x4
|
||||
{
|
||||
typedef int lane_type;
|
||||
enum { nlanes = 4 };
|
||||
vec_int4 val;
|
||||
|
||||
explicit v_int32x4(const vec_int4& v) : val(v)
|
||||
{}
|
||||
v_int32x4() : val(vec_int4_z)
|
||||
{}
|
||||
v_int32x4(vec_bint4 v) : val(vec_int4_c(v))
|
||||
{}
|
||||
v_int32x4(int v0, int v1, int v2, int v3) : val(vec_int4_set(v0, v1, v2, v3))
|
||||
{}
|
||||
int get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
struct v_float32x4
|
||||
{
|
||||
typedef float lane_type;
|
||||
enum { nlanes = 4 };
|
||||
vec_float4 val;
|
||||
|
||||
explicit v_float32x4(const vec_float4& v) : val(v)
|
||||
{}
|
||||
v_float32x4() : val(vec_float4_z)
|
||||
{}
|
||||
v_float32x4(vec_bint4 v) : val(vec_float4_c(v))
|
||||
{}
|
||||
v_float32x4(float v0, float v1, float v2, float v3) : val(vec_float4_set(v0, v1, v2, v3))
|
||||
{}
|
||||
float get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
struct v_uint64x2
|
||||
{
|
||||
typedef uint64 lane_type;
|
||||
enum { nlanes = 2 };
|
||||
vec_udword2 val;
|
||||
|
||||
explicit v_uint64x2(const vec_udword2& v) : val(v)
|
||||
{}
|
||||
v_uint64x2() : val(vec_udword2_z)
|
||||
{}
|
||||
v_uint64x2(vec_bdword2 v) : val(vec_udword2_c(v))
|
||||
{}
|
||||
v_uint64x2(uint64 v0, uint64 v1) : val(vec_udword2_set(v0, v1))
|
||||
{}
|
||||
uint64 get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
struct v_int64x2
|
||||
{
|
||||
typedef int64 lane_type;
|
||||
enum { nlanes = 2 };
|
||||
vec_dword2 val;
|
||||
|
||||
explicit v_int64x2(const vec_dword2& v) : val(v)
|
||||
{}
|
||||
v_int64x2() : val(vec_dword2_z)
|
||||
{}
|
||||
v_int64x2(vec_bdword2 v) : val(vec_dword2_c(v))
|
||||
{}
|
||||
v_int64x2(int64 v0, int64 v1) : val(vec_dword2_set(v0, v1))
|
||||
{}
|
||||
int64 get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
struct v_float64x2
|
||||
{
|
||||
typedef double lane_type;
|
||||
enum { nlanes = 2 };
|
||||
vec_double2 val;
|
||||
|
||||
explicit v_float64x2(const vec_double2& v) : val(v)
|
||||
{}
|
||||
v_float64x2() : val(vec_double2_z)
|
||||
{}
|
||||
v_float64x2(vec_bdword2 v) : val(vec_double2_c(v))
|
||||
{}
|
||||
v_float64x2(double v0, double v1) : val(vec_double2_set(v0, v1))
|
||||
{}
|
||||
double get0() const
|
||||
{ return vec_extract(val, 0); }
|
||||
};
|
||||
|
||||
//////////////// Load and store operations ///////////////
|
||||
|
||||
/*
|
||||
* clang-5 aborted during parse "vec_xxx_c" only if it's
|
||||
* inside a function template which is defined by preprocessor macro.
|
||||
*
|
||||
* if vec_xxx_c defined as C++ cast, clang-5 will pass it
|
||||
*/
|
||||
#define OPENCV_HAL_IMPL_VSX_INITVEC(_Tpvec, _Tp, suffix, cast) \
|
||||
inline _Tpvec v_setzero_##suffix() { return _Tpvec(); } \
|
||||
inline _Tpvec v_setall_##suffix(_Tp v) { return _Tpvec(vec_splats((_Tp)v));} \
|
||||
template<typename _Tpvec0> inline _Tpvec v_reinterpret_as_##suffix(const _Tpvec0 &a) \
|
||||
{ return _Tpvec((cast)a.val); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_uint8x16, uchar, u8, vec_uchar16)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_int8x16, schar, s8, vec_char16)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_uint16x8, ushort, u16, vec_ushort8)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_int16x8, short, s16, vec_short8)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_uint32x4, uint, u32, vec_uint4)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_int32x4, int, s32, vec_int4)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_uint64x2, uint64, u64, vec_udword2)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_int64x2, int64, s64, vec_dword2)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_float32x4, float, f32, vec_float4)
|
||||
OPENCV_HAL_IMPL_VSX_INITVEC(v_float64x2, double, f64, vec_double2)
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(_Tpvec, _Tp, ld_func, st_func) \
|
||||
inline _Tpvec v_load(const _Tp* ptr) \
|
||||
{ return _Tpvec(ld_func(0, ptr)); } \
|
||||
inline _Tpvec v_load_aligned(const _Tp* ptr) \
|
||||
{ return _Tpvec(ld_func(0, ptr)); } \
|
||||
inline _Tpvec v_load_low(const _Tp* ptr) \
|
||||
{ return _Tpvec(vec_ld_l8(ptr)); } \
|
||||
inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
|
||||
{ return _Tpvec(vec_mergesqh(vec_ld_l8(ptr0), vec_ld_l8(ptr1))); } \
|
||||
inline void v_store(_Tp* ptr, const _Tpvec& a) \
|
||||
{ st_func(a.val, 0, ptr); } \
|
||||
inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
|
||||
{ st_func(a.val, 0, ptr); } \
|
||||
inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vec_st_l8(a.val, ptr); } \
|
||||
inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
|
||||
{ vec_st_h8(a.val, ptr); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint8x16, uchar, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int8x16, schar, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint16x8, ushort, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int16x8, short, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint32x4, uint, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int32x4, int, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float32x4, float, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_float64x2, double, vsx_ld, vsx_st)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_uint64x2, uint64, vsx_ld2, vsx_st2)
|
||||
OPENCV_HAL_IMPL_VSX_LOADSTORE_INT_OP(v_int64x2, int64, vsx_ld2, vsx_st2)
|
||||
|
||||
//////////////// Value reordering ///////////////
|
||||
|
||||
/* de&interleave */
|
||||
#define OPENCV_HAL_IMPL_VSX_INTERLEAVE(_Tp, _Tpvec) \
|
||||
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b) \
|
||||
{ vec_ld_deinterleave(ptr, a.val, b.val);} \
|
||||
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, \
|
||||
_Tpvec& b, _Tpvec& c) \
|
||||
{ vec_ld_deinterleave(ptr, a.val, b.val, c.val); } \
|
||||
inline void v_load_deinterleave(const _Tp* ptr, _Tpvec& a, _Tpvec& b, \
|
||||
_Tpvec& c, _Tpvec& d) \
|
||||
{ vec_ld_deinterleave(ptr, a.val, b.val, c.val, d.val); } \
|
||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b) \
|
||||
{ vec_st_interleave(a.val, b.val, ptr); } \
|
||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, \
|
||||
const _Tpvec& b, const _Tpvec& c) \
|
||||
{ vec_st_interleave(a.val, b.val, c.val, ptr); } \
|
||||
inline void v_store_interleave(_Tp* ptr, const _Tpvec& a, const _Tpvec& b, \
|
||||
const _Tpvec& c, const _Tpvec& d) \
|
||||
{ vec_st_interleave(a.val, b.val, c.val, d.val, ptr); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(uchar, v_uint8x16)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(schar, v_int8x16)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(ushort, v_uint16x8)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(short, v_int16x8)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(uint, v_uint32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(int, v_int32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(float, v_float32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INTERLEAVE(double, v_float64x2)
|
||||
|
||||
/* Expand */
|
||||
#define OPENCV_HAL_IMPL_VSX_EXPAND(_Tpvec, _Tpwvec, _Tp, fl, fh) \
|
||||
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
|
||||
{ \
|
||||
b0.val = fh(a.val); \
|
||||
b1.val = fl(a.val); \
|
||||
} \
|
||||
inline _Tpwvec v_load_expand(const _Tp* ptr) \
|
||||
{ return _Tpwvec(fh(vsx_ld(0, ptr))); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_uint8x16, v_uint16x8, uchar, vec_unpacklu, vec_unpackhu)
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int8x16, v_int16x8, schar, vec_unpackl, vec_unpackh)
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_uint16x8, v_uint32x4, ushort, vec_unpacklu, vec_unpackhu)
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int16x8, v_int32x4, short, vec_unpackl, vec_unpackh)
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_uint32x4, v_uint64x2, uint, vec_unpacklu, vec_unpackhu)
|
||||
OPENCV_HAL_IMPL_VSX_EXPAND(v_int32x4, v_int64x2, int, vec_unpackl, vec_unpackh)
|
||||
|
||||
inline v_uint32x4 v_load_expand_q(const uchar* ptr)
|
||||
{ return v_uint32x4(vec_ld_buw(ptr)); }
|
||||
|
||||
inline v_int32x4 v_load_expand_q(const schar* ptr)
|
||||
{ return v_int32x4(vec_ld_bsw(ptr)); }
|
||||
|
||||
/* pack */
|
||||
#define OPENCV_HAL_IMPL_VSX_PACK(_Tpvec, _Tp, _Tpwvec, _Tpvn, _Tpdel, sfnc, pkfnc, addfnc, pack) \
|
||||
inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
|
||||
{ \
|
||||
return _Tpvec(pkfnc(a.val, b.val)); \
|
||||
} \
|
||||
inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
|
||||
{ \
|
||||
vec_st_l8(pkfnc(a.val, a.val), ptr); \
|
||||
} \
|
||||
template<int n> \
|
||||
inline _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
|
||||
{ \
|
||||
const __vector _Tpvn vn = vec_splats((_Tpvn)n); \
|
||||
const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \
|
||||
return _Tpvec(pkfnc(sfnc(addfnc(a.val, delta), vn), sfnc(addfnc(b.val, delta), vn))); \
|
||||
} \
|
||||
template<int n> \
|
||||
inline void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
|
||||
{ \
|
||||
const __vector _Tpvn vn = vec_splats((_Tpvn)n); \
|
||||
const __vector _Tpdel delta = vec_splats((_Tpdel)((_Tpdel)1 << (n-1))); \
|
||||
vec_st_l8(pkfnc(sfnc(addfnc(a.val, delta), vn), delta), ptr); \
|
||||
}
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_uint16x8, unsigned short, unsigned short,
|
||||
vec_sr, vec_packs, vec_adds, pack)
|
||||
OPENCV_HAL_IMPL_VSX_PACK(v_int8x16, schar, v_int16x8, unsigned short, short,
|
||||
vec_sra, vec_packs, vec_adds, pack)
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_uint32x4, unsigned int, unsigned int,
|
||||
vec_sr, vec_packs, vec_add, pack)
|
||||
OPENCV_HAL_IMPL_VSX_PACK(v_int16x8, short, v_int32x4, unsigned int, int,
|
||||
vec_sra, vec_packs, vec_add, pack)
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_uint64x2, unsigned long long, unsigned long long,
|
||||
vec_sr, vec_pack, vec_add, pack)
|
||||
OPENCV_HAL_IMPL_VSX_PACK(v_int32x4, int, v_int64x2, unsigned long long, long long,
|
||||
vec_sra, vec_pack, vec_add, pack)
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_PACK(v_uint8x16, uchar, v_int16x8, unsigned short, short,
|
||||
vec_sra, vec_packsu, vec_adds, pack_u)
|
||||
OPENCV_HAL_IMPL_VSX_PACK(v_uint16x8, ushort, v_int32x4, unsigned int, int,
|
||||
vec_sra, vec_packsu, vec_add, pack_u)
|
||||
// Following variant is not implemented on other platforms:
|
||||
//OPENCV_HAL_IMPL_VSX_PACK(v_uint32x4, uint, v_int64x2, unsigned long long, long long,
|
||||
// vec_sra, vec_packsu, vec_add, pack_u)
|
||||
|
||||
/* Recombine */
|
||||
template <typename _Tpvec>
|
||||
inline void v_zip(const _Tpvec& a0, const _Tpvec& a1, _Tpvec& b0, _Tpvec& b1)
|
||||
{
|
||||
b0.val = vec_mergeh(a0.val, a1.val);
|
||||
b1.val = vec_mergel(a0.val, a1.val);
|
||||
}
|
||||
|
||||
template <typename _Tpvec>
|
||||
inline _Tpvec v_combine_high(const _Tpvec& a, const _Tpvec& b)
|
||||
{ return _Tpvec(vec_mergesql(a.val, b.val)); }
|
||||
|
||||
template <typename _Tpvec>
|
||||
inline _Tpvec v_combine_low(const _Tpvec& a, const _Tpvec& b)
|
||||
{ return _Tpvec(vec_mergesqh(a.val, b.val)); }
|
||||
|
||||
template <typename _Tpvec>
|
||||
inline void v_recombine(const _Tpvec& a, const _Tpvec& b, _Tpvec& c, _Tpvec& d)
|
||||
{
|
||||
c.val = vec_mergesqh(a.val, b.val);
|
||||
d.val = vec_mergesql(a.val, b.val);
|
||||
}
|
||||
|
||||
/* Extract */
|
||||
template<int s, typename _Tpvec>
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b)
|
||||
{
|
||||
const int w = sizeof(typename _Tpvec::lane_type);
|
||||
const int n = _Tpvec::nlanes;
|
||||
const unsigned int sf = ((w * n) - (s * w));
|
||||
if (s == 0)
|
||||
return _Tpvec(a.val);
|
||||
else if (sf > 15)
|
||||
return _Tpvec();
|
||||
// bitwise it just to make xlc happy
|
||||
return _Tpvec(vec_sld(b.val, a.val, sf & 15));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_EXTRACT_2(_Tpvec) \
|
||||
template<int s> \
|
||||
inline _Tpvec v_extract(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
switch(s) { \
|
||||
case 0: return _Tpvec(a.val); \
|
||||
case 2: return _Tpvec(b.val); \
|
||||
case 1: return _Tpvec(vec_sldw(b.val, a.val, 2)); \
|
||||
default: return _Tpvec(); \
|
||||
} \
|
||||
}
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_uint64x2)
|
||||
OPENCV_HAL_IMPL_VSX_EXTRACT_2(v_int64x2)
|
||||
|
||||
|
||||
////////// Arithmetic, bitwise and comparison operations /////////
|
||||
|
||||
/* Element-wise binary and unary operations */
|
||||
/** Arithmetics **/
|
||||
#define OPENCV_HAL_IMPL_VSX_BIN_OP(bin_op, _Tpvec, intrin) \
|
||||
inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(intrin(a.val, b.val)); } \
|
||||
inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
|
||||
{ a.val = intrin(a.val, b.val); return a; }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint8x16, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint8x16, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int8x16, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int8x16, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint16x8, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint16x8, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint16x8, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int16x8, vec_adds)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int16x8, vec_subs)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int16x8, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint32x4, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint32x4, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_uint32x4, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int32x4, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int32x4, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_int32x4, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float32x4, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float32x4, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float32x4, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float32x4, vec_div)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_float64x2, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_float64x2, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(*, v_float64x2, vec_mul)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(/, v_float64x2, vec_div)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_uint64x2, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_uint64x2, vec_sub)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(+, v_int64x2, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(-, v_int64x2, vec_sub)
|
||||
|
||||
inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, v_int32x4& c, v_int32x4& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackh(a.val), vec_unpackh(b.val));
|
||||
d.val = vec_mul(vec_unpackl(a.val), vec_unpackl(b.val));
|
||||
}
|
||||
inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, v_uint32x4& c, v_uint32x4& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
|
||||
d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
|
||||
}
|
||||
inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, v_uint64x2& c, v_uint64x2& d)
|
||||
{
|
||||
c.val = vec_mul(vec_unpackhu(a.val), vec_unpackhu(b.val));
|
||||
d.val = vec_mul(vec_unpacklu(a.val), vec_unpacklu(b.val));
|
||||
}
|
||||
|
||||
/** Non-saturating arithmetics **/
|
||||
#define OPENCV_HAL_IMPL_VSX_BIN_FUNC(func, intrin) \
|
||||
template<typename _Tpvec> \
|
||||
inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(intrin(a.val, b.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_add_wrap, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_sub_wrap, vec_sub)
|
||||
|
||||
/** Bitwise shifts **/
|
||||
#define OPENCV_HAL_IMPL_VSX_SHIFT_OP(_Tpvec, shr, splfunc) \
|
||||
inline _Tpvec operator << (const _Tpvec& a, int imm) \
|
||||
{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \
|
||||
inline _Tpvec operator >> (const _Tpvec& a, int imm) \
|
||||
{ return _Tpvec(shr(a.val, splfunc(imm))); } \
|
||||
template<int imm> inline _Tpvec v_shl(const _Tpvec& a) \
|
||||
{ return _Tpvec(vec_sl(a.val, splfunc(imm))); } \
|
||||
template<int imm> inline _Tpvec v_shr(const _Tpvec& a) \
|
||||
{ return _Tpvec(shr(a.val, splfunc(imm))); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint8x16, vec_sr, vec_uchar16_sp)
|
||||
OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint16x8, vec_sr, vec_ushort8_sp)
|
||||
OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint32x4, vec_sr, vec_uint4_sp)
|
||||
OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_uint64x2, vec_sr, vec_udword2_sp)
|
||||
// algebraic right shift
|
||||
OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int8x16, vec_sra, vec_uchar16_sp)
|
||||
OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int16x8, vec_sra, vec_ushort8_sp)
|
||||
OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int32x4, vec_sra, vec_uint4_sp)
|
||||
OPENCV_HAL_IMPL_VSX_SHIFT_OP(v_int64x2, vec_sra, vec_udword2_sp)
|
||||
|
||||
/** Bitwise logic **/
|
||||
#define OPENCV_HAL_IMPL_VSX_LOGIC_OP(_Tpvec) \
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(&, _Tpvec, vec_and) \
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(|, _Tpvec, vec_or) \
|
||||
OPENCV_HAL_IMPL_VSX_BIN_OP(^, _Tpvec, vec_xor) \
|
||||
inline _Tpvec operator ~ (const _Tpvec& a) \
|
||||
{ return _Tpvec(vec_not(a.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint8x16)
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int8x16)
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint16x8)
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int16x8)
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint32x4)
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int32x4)
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_uint64x2)
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_int64x2)
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float32x4)
|
||||
OPENCV_HAL_IMPL_VSX_LOGIC_OP(v_float64x2)
|
||||
|
||||
/** Bitwise select **/
|
||||
#define OPENCV_HAL_IMPL_VSX_SELECT(_Tpvec, cast) \
|
||||
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_sel(b.val, a.val, cast(mask.val))); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_SELECT(v_uint8x16, vec_bchar16_c)
|
||||
OPENCV_HAL_IMPL_VSX_SELECT(v_int8x16, vec_bchar16_c)
|
||||
OPENCV_HAL_IMPL_VSX_SELECT(v_uint16x8, vec_bshort8_c)
|
||||
OPENCV_HAL_IMPL_VSX_SELECT(v_int16x8, vec_bshort8_c)
|
||||
OPENCV_HAL_IMPL_VSX_SELECT(v_uint32x4, vec_bint4_c)
|
||||
OPENCV_HAL_IMPL_VSX_SELECT(v_int32x4, vec_bint4_c)
|
||||
OPENCV_HAL_IMPL_VSX_SELECT(v_float32x4, vec_bint4_c)
|
||||
OPENCV_HAL_IMPL_VSX_SELECT(v_float64x2, vec_bdword2_c)
|
||||
|
||||
/** Comparison **/
|
||||
#define OPENCV_HAL_IMPL_VSX_INT_CMP_OP(_Tpvec) \
|
||||
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_cmpeq(a.val, b.val)); } \
|
||||
inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_cmpne(a.val, b.val)); } \
|
||||
inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_cmplt(a.val, b.val)); } \
|
||||
inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_cmpgt(a.val, b.val)); } \
|
||||
inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_cmple(a.val, b.val)); } \
|
||||
inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_cmpge(a.val, b.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint8x16)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int8x16)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint16x8)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int16x8)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float32x4)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_float64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_uint64x2)
|
||||
OPENCV_HAL_IMPL_VSX_INT_CMP_OP(v_int64x2)
|
||||
|
||||
/** min/max **/
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_min, vec_min)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_max, vec_max)
|
||||
|
||||
/** Rotate **/
|
||||
#define OPENCV_IMPL_VSX_ROTATE(_Tpvec, suffix, shf, cast) \
|
||||
template<int imm> \
|
||||
inline _Tpvec v_rotate_##suffix(const _Tpvec& a) \
|
||||
{ \
|
||||
const int wd = imm * sizeof(typename _Tpvec::lane_type); \
|
||||
if (wd > 15) \
|
||||
return _Tpvec(); \
|
||||
return _Tpvec((cast)shf(vec_uchar16_c(a.val), vec_uchar16_sp(wd << 3))); \
|
||||
}
|
||||
|
||||
#define OPENCV_IMPL_VSX_ROTATE_LR(_Tpvec, cast) \
|
||||
OPENCV_IMPL_VSX_ROTATE(_Tpvec, left, vec_slo, cast) \
|
||||
OPENCV_IMPL_VSX_ROTATE(_Tpvec, right, vec_sro, cast)
|
||||
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_uint8x16, vec_uchar16)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_int8x16, vec_char16)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_uint16x8, vec_ushort8)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_int16x8, vec_short8)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_uint32x4, vec_uint4)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_int32x4, vec_int4)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_uint64x2, vec_udword2)
|
||||
OPENCV_IMPL_VSX_ROTATE_LR(v_int64x2, vec_dword2)
|
||||
|
||||
|
||||
template<int imm, typename _Tpvec>
|
||||
inline _Tpvec v_rotate_right(const _Tpvec& a, const _Tpvec& b)
|
||||
{
|
||||
enum { CV_SHIFT = 16 - imm * (sizeof(typename _Tpvec::lane_type)) };
|
||||
if (CV_SHIFT == 16)
|
||||
return a;
|
||||
#ifdef __IBMCPP__
|
||||
return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT & 15));
|
||||
#else
|
||||
return _Tpvec(vec_sld(b.val, a.val, CV_SHIFT));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<int imm, typename _Tpvec>
|
||||
inline _Tpvec v_rotate_left(const _Tpvec& a, const _Tpvec& b)
|
||||
{
|
||||
enum { CV_SHIFT = imm * (sizeof(typename _Tpvec::lane_type)) };
|
||||
if (CV_SHIFT == 16)
|
||||
return b;
|
||||
return _Tpvec(vec_sld(a.val, b.val, CV_SHIFT));
|
||||
}
|
||||
|
||||
#define OPENCV_IMPL_VSX_ROTATE_64(_Tpvec, suffix, rg1, rg2) \
|
||||
template<int imm> \
|
||||
inline _Tpvec v_rotate_##suffix(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ \
|
||||
if (imm == 1) \
|
||||
return _Tpvec(vec_permi(rg1.val, rg2.val, 2)); \
|
||||
return imm ? b : a; \
|
||||
}
|
||||
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, right, a, b)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, right, a, b)
|
||||
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_int64x2, left, b, a)
|
||||
OPENCV_IMPL_VSX_ROTATE_64(v_uint64x2, left, b, a)
|
||||
|
||||
////////// Reduce and mask /////////
|
||||
|
||||
/** Reduce **/
|
||||
inline short v_reduce_sum(const v_int16x8& a)
|
||||
{
|
||||
const vec_int4 zero = vec_int4_z;
|
||||
return saturate_cast<short>(vec_extract(vec_sums(vec_sum4s(a.val, zero), zero), 3));
|
||||
}
|
||||
inline ushort v_reduce_sum(const v_uint16x8& a)
|
||||
{
|
||||
const vec_int4 v4 = vec_int4_c(vec_unpackhu(vec_adds(a.val, vec_sld(a.val, a.val, 8))));
|
||||
return saturate_cast<ushort>(vec_extract(vec_sums(v4, vec_int4_z), 3));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(_Tpvec, _Tpvec2, scalartype, suffix, func) \
|
||||
inline scalartype v_reduce_##suffix(const _Tpvec& a) \
|
||||
{ \
|
||||
const _Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \
|
||||
return vec_extract(func(rs, vec_sld(rs, rs, 4)), 0); \
|
||||
}
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, sum, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_uint32x4, vec_uint4, uint, min, vec_min)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, sum, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_int32x4, vec_int4, int, min, vec_min)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, sum, vec_add)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_4(v_float32x4, vec_float4, float, min, vec_min)
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(_Tpvec, _Tpvec2, scalartype, suffix, func) \
|
||||
inline scalartype v_reduce_##suffix(const _Tpvec& a) \
|
||||
{ \
|
||||
_Tpvec2 rs = func(a.val, vec_sld(a.val, a.val, 8)); \
|
||||
rs = func(rs, vec_sld(rs, rs, 4)); \
|
||||
return vec_extract(func(rs, vec_sld(rs, rs, 2)), 0); \
|
||||
}
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_uint16x8, vec_ushort8, ushort, min, vec_min)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, max, vec_max)
|
||||
OPENCV_HAL_IMPL_VSX_REDUCE_OP_8(v_int16x8, vec_short8, short, min, vec_min)
|
||||
|
||||
inline v_float32x4 v_reduce_sum4(const v_float32x4& a, const v_float32x4& b,
|
||||
const v_float32x4& c, const v_float32x4& d)
|
||||
{
|
||||
vec_float4 ac = vec_add(vec_mergel(a.val, c.val), vec_mergeh(a.val, c.val));
|
||||
ac = vec_add(ac, vec_sld(ac, ac, 8));
|
||||
|
||||
vec_float4 bd = vec_add(vec_mergel(b.val, d.val), vec_mergeh(b.val, d.val));
|
||||
bd = vec_add(bd, vec_sld(bd, bd, 8));
|
||||
return v_float32x4(vec_mergeh(ac, bd));
|
||||
}
|
||||
|
||||
/** Popcount **/
|
||||
template<typename _Tpvec>
|
||||
inline v_uint32x4 v_popcount(const _Tpvec& a)
|
||||
{ return v_uint32x4(vec_popcntu(vec_uint4_c(a.val))); }
|
||||
|
||||
/** Mask **/
|
||||
inline int v_signmask(const v_uint8x16& a)
|
||||
{
|
||||
vec_uchar16 sv = vec_sr(a.val, vec_uchar16_sp(7));
|
||||
static const vec_uchar16 slm = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
|
||||
sv = vec_sl(sv, slm);
|
||||
vec_uint4 sv4 = vec_sum4s(sv, vec_uint4_z);
|
||||
static const vec_uint4 slm4 = {0, 0, 8, 8};
|
||||
sv4 = vec_sl(sv4, slm4);
|
||||
return vec_extract(vec_sums((vec_int4) sv4, vec_int4_z), 3);
|
||||
}
|
||||
inline int v_signmask(const v_int8x16& a)
|
||||
{ return v_signmask(v_reinterpret_as_u8(a)); }
|
||||
|
||||
inline int v_signmask(const v_int16x8& a)
|
||||
{
|
||||
static const vec_ushort8 slm = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
vec_short8 sv = vec_sr(a.val, vec_ushort8_sp(15));
|
||||
sv = vec_sl(sv, slm);
|
||||
vec_int4 svi = vec_int4_z;
|
||||
svi = vec_sums(vec_sum4s(sv, svi), svi);
|
||||
return vec_extract(svi, 3);
|
||||
}
|
||||
inline int v_signmask(const v_uint16x8& a)
|
||||
{ return v_signmask(v_reinterpret_as_s16(a)); }
|
||||
|
||||
inline int v_signmask(const v_int32x4& a)
|
||||
{
|
||||
static const vec_uint4 slm = {0, 1, 2, 3};
|
||||
vec_int4 sv = vec_sr(a.val, vec_uint4_sp(31));
|
||||
sv = vec_sl(sv, slm);
|
||||
sv = vec_sums(sv, vec_int4_z);
|
||||
return vec_extract(sv, 3);
|
||||
}
|
||||
inline int v_signmask(const v_uint32x4& a)
|
||||
{ return v_signmask(v_reinterpret_as_s32(a)); }
|
||||
inline int v_signmask(const v_float32x4& a)
|
||||
{ return v_signmask(v_reinterpret_as_s32(a)); }
|
||||
|
||||
inline int v_signmask(const v_int64x2& a)
|
||||
{
|
||||
VSX_UNUSED(const vec_dword2) sv = vec_sr(a.val, vec_udword2_sp(63));
|
||||
return (int)vec_extract(sv, 0) | (int)vec_extract(sv, 1) << 1;
|
||||
}
|
||||
inline int v_signmask(const v_uint64x2& a)
|
||||
{ return v_signmask(v_reinterpret_as_s64(a)); }
|
||||
inline int v_signmask(const v_float64x2& a)
|
||||
{ return v_signmask(v_reinterpret_as_s64(a)); }
|
||||
|
||||
|
||||
template<typename _Tpvec>
|
||||
inline bool v_check_all(const _Tpvec& a)
|
||||
{ return vec_all_lt(a.val, _Tpvec().val);}
|
||||
inline bool v_check_all(const v_uint8x16 &a)
|
||||
{ return v_check_all(v_reinterpret_as_s8(a)); }
|
||||
inline bool v_check_all(const v_uint16x8 &a)
|
||||
{ return v_check_all(v_reinterpret_as_s16(a)); }
|
||||
inline bool v_check_all(const v_uint32x4 &a)
|
||||
{ return v_check_all(v_reinterpret_as_s32(a)); }
|
||||
|
||||
template<typename _Tpvec>
|
||||
inline bool v_check_any(const _Tpvec& a)
|
||||
{ return vec_any_lt(a.val, _Tpvec().val);}
|
||||
inline bool v_check_any(const v_uint8x16 &a)
|
||||
{ return v_check_any(v_reinterpret_as_s8(a)); }
|
||||
inline bool v_check_any(const v_uint16x8 &a)
|
||||
{ return v_check_any(v_reinterpret_as_s16(a)); }
|
||||
inline bool v_check_any(const v_uint32x4 &a)
|
||||
{ return v_check_any(v_reinterpret_as_s32(a)); }
|
||||
|
||||
////////// Other math /////////
|
||||
|
||||
/** Some frequent operations **/
|
||||
inline v_float32x4 v_sqrt(const v_float32x4& x)
|
||||
{ return v_float32x4(vec_sqrt(x.val)); }
|
||||
inline v_float64x2 v_sqrt(const v_float64x2& x)
|
||||
{ return v_float64x2(vec_sqrt(x.val)); }
|
||||
|
||||
inline v_float32x4 v_invsqrt(const v_float32x4& x)
|
||||
{ return v_float32x4(vec_rsqrt(x.val)); }
|
||||
inline v_float64x2 v_invsqrt(const v_float64x2& x)
|
||||
{ return v_float64x2(vec_rsqrt(x.val)); }
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_MULADD(_Tpvec) \
|
||||
inline _Tpvec v_magnitude(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_sqrt(vec_madd(a.val, a.val, vec_mul(b.val, b.val)))); } \
|
||||
inline _Tpvec v_sqr_magnitude(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec(vec_madd(a.val, a.val, vec_mul(b.val, b.val))); } \
|
||||
inline _Tpvec v_muladd(const _Tpvec& a, const _Tpvec& b, const _Tpvec& c) \
|
||||
{ return _Tpvec(vec_madd(a.val, b.val, c.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_MULADD(v_float32x4)
|
||||
OPENCV_HAL_IMPL_VSX_MULADD(v_float64x2)
|
||||
|
||||
// TODO: exp, log, sin, cos
|
||||
|
||||
/** Absolute values **/
|
||||
inline v_uint8x16 v_abs(const v_int8x16& x)
|
||||
{ return v_uint8x16(vec_uchar16_c(vec_abs(x.val))); }
|
||||
|
||||
inline v_uint16x8 v_abs(const v_int16x8& x)
|
||||
{ return v_uint16x8(vec_ushort8_c(vec_abs(x.val))); }
|
||||
|
||||
inline v_uint32x4 v_abs(const v_int32x4& x)
|
||||
{ return v_uint32x4(vec_uint4_c(vec_abs(x.val))); }
|
||||
|
||||
inline v_float32x4 v_abs(const v_float32x4& x)
|
||||
{ return v_float32x4(vec_abs(x.val)); }
|
||||
|
||||
inline v_float64x2 v_abs(const v_float64x2& x)
|
||||
{ return v_float64x2(vec_abs(x.val)); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC(v_absdiff, vec_absd)
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
|
||||
inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
|
||||
{ return _Tpvec2(cast(intrin(a.val, b.val))); }
|
||||
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int8x16, v_uint8x16, vec_uchar16_c, v_absdiff, vec_absd)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int16x8, v_uint16x8, vec_ushort8_c, v_absdiff, vec_absd)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int32x4, v_uint32x4, vec_uint4_c, v_absdiff, vec_absd)
|
||||
OPENCV_HAL_IMPL_VSX_BIN_FUNC2(v_int64x2, v_uint64x2, vec_udword2_c, v_absdiff, vec_absd)
|
||||
|
||||
////////// Conversions /////////
|
||||
|
||||
/** Rounding **/
|
||||
inline v_int32x4 v_round(const v_float32x4& a)
|
||||
{ return v_int32x4(vec_cts(vec_round(a.val))); }
|
||||
|
||||
inline v_int32x4 v_round(const v_float64x2& a)
|
||||
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_round(a.val)), vec_int4_z)); }
|
||||
|
||||
inline v_int32x4 v_floor(const v_float32x4& a)
|
||||
{ return v_int32x4(vec_cts(vec_floor(a.val))); }
|
||||
|
||||
inline v_int32x4 v_floor(const v_float64x2& a)
|
||||
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_floor(a.val)), vec_int4_z)); }
|
||||
|
||||
inline v_int32x4 v_ceil(const v_float32x4& a)
|
||||
{ return v_int32x4(vec_cts(vec_ceil(a.val))); }
|
||||
|
||||
inline v_int32x4 v_ceil(const v_float64x2& a)
|
||||
{ return v_int32x4(vec_mergesqo(vec_ctso(vec_ceil(a.val)), vec_int4_z)); }
|
||||
|
||||
inline v_int32x4 v_trunc(const v_float32x4& a)
|
||||
{ return v_int32x4(vec_cts(a.val)); }
|
||||
|
||||
inline v_int32x4 v_trunc(const v_float64x2& a)
|
||||
{ return v_int32x4(vec_mergesqo(vec_ctso(a.val), vec_int4_z)); }
|
||||
|
||||
/** To float **/
|
||||
inline v_float32x4 v_cvt_f32(const v_int32x4& a)
|
||||
{ return v_float32x4(vec_ctf(a.val)); }
|
||||
|
||||
inline v_float32x4 v_cvt_f32(const v_float64x2& a)
|
||||
{ return v_float32x4(vec_mergesqo(vec_cvfo(a.val), vec_float4_z)); }
|
||||
|
||||
inline v_float64x2 v_cvt_f64(const v_int32x4& a)
|
||||
{ return v_float64x2(vec_ctdo(vec_mergeh(a.val, a.val))); }
|
||||
|
||||
inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
|
||||
{ return v_float64x2(vec_ctdo(vec_mergel(a.val, a.val))); }
|
||||
|
||||
inline v_float64x2 v_cvt_f64(const v_float32x4& a)
|
||||
{ return v_float64x2(vec_cvfo(vec_mergeh(a.val, a.val))); }
|
||||
|
||||
inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
|
||||
{ return v_float64x2(vec_cvfo(vec_mergel(a.val, a.val))); }
|
||||
|
||||
/** Reinterpret **/
|
||||
/** its up there with load and store operations **/
|
||||
|
||||
////////// Matrix operations /////////
|
||||
|
||||
inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
|
||||
{ return v_int32x4(vec_msum(a.val, b.val, vec_int4_z)); }
|
||||
|
||||
inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
|
||||
const v_float32x4& m1, const v_float32x4& m2,
|
||||
const v_float32x4& m3)
|
||||
{
|
||||
const vec_float4 v0 = vec_splat(v.val, 0);
|
||||
const vec_float4 v1 = vec_splat(v.val, 1);
|
||||
const vec_float4 v2 = vec_splat(v.val, 2);
|
||||
VSX_UNUSED(const vec_float4) v3 = vec_splat(v.val, 3);
|
||||
return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, vec_mul(v3, m3.val)))));
|
||||
}
|
||||
|
||||
inline v_float32x4 v_matmuladd(const v_float32x4& v, const v_float32x4& m0,
|
||||
const v_float32x4& m1, const v_float32x4& m2,
|
||||
const v_float32x4& a)
|
||||
{
|
||||
const vec_float4 v0 = vec_splat(v.val, 0);
|
||||
const vec_float4 v1 = vec_splat(v.val, 1);
|
||||
const vec_float4 v2 = vec_splat(v.val, 2);
|
||||
return v_float32x4(vec_madd(v0, m0.val, vec_madd(v1, m1.val, vec_madd(v2, m2.val, a.val))));
|
||||
}
|
||||
|
||||
#define OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(_Tpvec, _Tpvec2) \
|
||||
inline void v_transpose4x4(const _Tpvec& a0, const _Tpvec& a1, \
|
||||
const _Tpvec& a2, const _Tpvec& a3, \
|
||||
_Tpvec& b0, _Tpvec& b1, _Tpvec& b2, _Tpvec& b3) \
|
||||
{ \
|
||||
_Tpvec2 a02 = vec_mergeh(a0.val, a2.val); \
|
||||
_Tpvec2 a13 = vec_mergeh(a1.val, a3.val); \
|
||||
b0.val = vec_mergeh(a02, a13); \
|
||||
b1.val = vec_mergel(a02, a13); \
|
||||
a02 = vec_mergel(a0.val, a2.val); \
|
||||
a13 = vec_mergel(a1.val, a3.val); \
|
||||
b2.val = vec_mergeh(a02, a13); \
|
||||
b3.val = vec_mergel(a02, a13); \
|
||||
}
|
||||
OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4)
|
||||
OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4)
|
||||
OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4)
|
||||
|
||||
//! @name Check SIMD support
|
||||
//! @{
|
||||
//! @brief Check CPU capability of SIMD operation
|
||||
static inline bool hasSIMD128()
|
||||
{
|
||||
return (CV_CPU_HAS_SUPPORT_VSX) ? true : false;
|
||||
}
|
||||
|
||||
//! @}
|
||||
|
||||
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
|
||||
|
||||
//! @endcond
|
||||
|
||||
}
|
||||
|
||||
#endif // OPENCV_HAL_VSX_HPP
|
Reference in New Issue
Block a user