@@ -92,7 +92,11 @@ namespace
92
92
{
93
93
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_type;
94
94
95
+ #if USE_NPP_STREAM_CTX
96
+ typedef NppStatus (*func_t )(const npp_type* pSrc1, int nSrc1Step, const npp_type* pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI, NppStreamContext ctx);
97
+ #else
95
98
typedef NppStatus (*func_t )(const npp_type* pSrc1, int nSrc1Step, const npp_type* pConstants, npp_type* pDst, int nDstStep, NppiSize oSizeROI);
99
+ #endif
96
100
};
97
101
98
102
template <int DEPTH, int cn, typename NppBitwiseCFunc<DEPTH, cn>::func_t func> struct NppBitwiseC
@@ -116,7 +120,11 @@ namespace
116
120
cv::saturate_cast<npp_type>(value[3 ])
117
121
};
118
122
123
+ #if USE_NPP_STREAM_CTX
124
+ nppSafeCall (func (src.ptr <npp_type>(), static_cast <int >(src.step ), pConstants, dst.ptr <npp_type>(), static_cast <int >(dst.step ), oSizeROI, h));
125
+ #else
119
126
nppSafeCall ( func (src.ptr <npp_type>(), static_cast <int >(src.step ), pConstants, dst.ptr <npp_type>(), static_cast <int >(dst.step ), oSizeROI) );
127
+ #endif
120
128
121
129
if (stream == 0 )
122
130
CV_CUDEV_SAFE_CALL ( cudaDeviceSynchronize () );
@@ -131,13 +139,39 @@ void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const Gpu
131
139
typedef void (*func_t )(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream);
132
140
static const func_t funcs[3 ][6 ][4 ] =
133
141
{
142
+ #if USE_NPP_STREAM_CTX
143
+ {
144
+ {BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
145
+ {BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
146
+ {BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R_Ctx>::call},
147
+ {BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R_Ctx>::call},
148
+ {BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R_Ctx>::call},
149
+ {BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R_Ctx>::call}
150
+ },
134
151
{
135
- {BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
136
- {BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call},
137
- {BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R>::call},
138
- {BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R>::call},
139
- {BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R>::call},
140
- {BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R>::call}
152
+ {BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiOrC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
153
+ {BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiOrC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
154
+ {BitScalar<ushort, bitScalarOp<bit_or, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiOrC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiOrC_16u_C4R_Ctx>::call},
155
+ {BitScalar<ushort, bitScalarOp<bit_or, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiOrC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiOrC_16u_C4R_Ctx>::call},
156
+ {BitScalar<uint, bitScalarOp<bit_or, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiOrC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiOrC_32s_C4R_Ctx>::call},
157
+ {BitScalar<uint, bitScalarOp<bit_or, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiOrC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiOrC_32s_C4R_Ctx>::call}
158
+ },
159
+ {
160
+ {BitScalar<uchar, bitScalarOp<bit_xor, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiXorC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_xor, uint> >::call},
161
+ {BitScalar<uchar, bitScalarOp<bit_xor, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiXorC_8u_C3R_Ctx >::call, BitScalar4< bitScalarOp<bit_xor, uint> >::call},
162
+ {BitScalar<ushort, bitScalarOp<bit_xor, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiXorC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiXorC_16u_C4R_Ctx>::call},
163
+ {BitScalar<ushort, bitScalarOp<bit_xor, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiXorC_16u_C3R_Ctx>::call, NppBitwiseC<CV_16U, 4 , nppiXorC_16u_C4R_Ctx>::call},
164
+ {BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiXorC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiXorC_32s_C4R_Ctx>::call},
165
+ {BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiXorC_32s_C3R_Ctx>::call, NppBitwiseC<CV_32S, 4 , nppiXorC_32s_C4R_Ctx>::call}
166
+ }
167
+ #else
168
+ {
169
+ { BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call, 0 , NppBitwiseC<CV_8U, 3 , nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call },
170
+ { BitScalar<uchar, bitScalarOp<bit_and, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiAndC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_and, uint> >::call },
171
+ { BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R>::call },
172
+ { BitScalar<ushort, bitScalarOp<bit_and, ushort> >::call, 0 , NppBitwiseC<CV_16U, 3 , nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4 , nppiAndC_16u_C4R>::call },
173
+ { BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R>::call },
174
+ { BitScalar<uint, bitScalarOp<bit_and, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiAndC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiAndC_32s_C4R>::call }
141
175
},
142
176
{
143
177
{BitScalar<uchar, bitScalarOp<bit_or, uchar> >::call , 0 , NppBitwiseC<CV_8U , 3 , nppiOrC_8u_C3R >::call, BitScalar4< bitScalarOp<bit_or, uint> >::call},
@@ -155,6 +189,7 @@ void bitScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const Gpu
155
189
{BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiXorC_32s_C4R>::call},
156
190
{BitScalar<uint, bitScalarOp<bit_xor, uint> >::call , 0 , NppBitwiseC<CV_32S, 3 , nppiXorC_32s_C3R>::call, NppBitwiseC<CV_32S, 4 , nppiXorC_32s_C4R>::call}
157
191
}
192
+ #endif
158
193
};
159
194
160
195
const int depth = src.depth ();
0 commit comments