Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit eefa327

Browse filesBrowse files
authored
Merge pull request opencv#27042 from fengyuentau:4x/core/normDiff_simd
core: vectorize normDiff with universal intrinsics opencv#27042 Merge with opencv/opencv_extra#1242. Performance results on Desktop Intel i7-12700K, Apple M2, Jetson Orin and SpaceMIT K1: [perf-normDiff.zip](https://github.com/user-attachments/files/19178689/perf-normDiff.zip) ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
1 parent 46dbc57 commit eefa327
Copy full SHA for eefa327

File tree

Expand file treeCollapse file tree

3 files changed

+754
-134
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+754
-134
lines changed

‎modules/core/perf/perf_norm.cpp

Copy file name to clipboardExpand all lines: modules/core/perf/perf_norm.cpp
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ PERF_TEST_P(Size_MatType_NormType, norm_mask,
5959
PERF_TEST_P(Size_MatType_NormType, norm2,
6060
testing::Combine(
6161
testing::Values(TYPICAL_MAT_SIZES),
62-
testing::Values(TYPICAL_MAT_TYPES),
62+
testing::Values(CV_8UC1, CV_8UC4, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1),
6363
testing::Values((int)NORM_INF, (int)NORM_L1, (int)NORM_L2, (int)(NORM_RELATIVE+NORM_INF), (int)(NORM_RELATIVE+NORM_L1), (int)(NORM_RELATIVE+NORM_L2))
6464
)
6565
)
@@ -82,7 +82,7 @@ PERF_TEST_P(Size_MatType_NormType, norm2,
8282
PERF_TEST_P(Size_MatType_NormType, norm2_mask,
8383
testing::Combine(
8484
testing::Values(TYPICAL_MAT_SIZES),
85-
testing::Values(TYPICAL_MAT_TYPES),
85+
testing::Values(CV_8UC1, CV_8UC4, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1),
8686
testing::Values((int)NORM_INF, (int)NORM_L1, (int)NORM_L2, (int)(NORM_RELATIVE|NORM_INF), (int)(NORM_RELATIVE|NORM_L1), (int)(NORM_RELATIVE|NORM_L2))
8787
)
8888
)

‎modules/core/src/norm.dispatch.cpp

Copy file name to clipboardExpand all lines: modules/core/src/norm.dispatch.cpp
+13-132Lines changed: 13 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -218,120 +218,9 @@ int normL1_(const uchar* a, const uchar* b, int n)
218218

219219
//==================================================================================================
220220

221-
template<typename T, typename ST> int
222-
normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
223-
{
224-
ST result = *_result;
225-
if( !mask )
226-
{
227-
result = std::max(result, normInf<T, ST>(src1, src2, len*cn));
228-
}
229-
else
230-
{
231-
for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
232-
if( mask[i] )
233-
{
234-
for( int k = 0; k < cn; k++ )
235-
result = std::max(result, (ST)std::abs(src1[k] - src2[k]));
236-
}
237-
}
238-
*_result = result;
239-
return 0;
240-
}
241-
242-
template<typename T, typename ST> int
243-
normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
244-
{
245-
ST result = *_result;
246-
if( !mask )
247-
{
248-
result += normL1<T, ST>(src1, src2, len*cn);
249-
}
250-
else
251-
{
252-
for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
253-
if( mask[i] )
254-
{
255-
for( int k = 0; k < cn; k++ )
256-
result += std::abs(src1[k] - src2[k]);
257-
}
258-
}
259-
*_result = result;
260-
return 0;
261-
}
262-
263-
template<typename T, typename ST> int
264-
normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn)
265-
{
266-
ST result = *_result;
267-
if( !mask )
268-
{
269-
result += normL2Sqr<T, ST>(src1, src2, len*cn);
270-
}
271-
else
272-
{
273-
for( int i = 0; i < len; i++, src1 += cn, src2 += cn )
274-
if( mask[i] )
275-
{
276-
for( int k = 0; k < cn; k++ )
277-
{
278-
ST v = src1[k] - src2[k];
279-
result += v*v;
280-
}
281-
}
282-
}
283-
*_result = result;
284-
return 0;
285-
}
286-
287-
#define CV_DEF_NORM_DIFF_FUNC(L, suffix, type, ntype) \
288-
static int normDiff##L##_##suffix(const type* src1, const type* src2, \
289-
const uchar* mask, ntype* r, int len, int cn) \
290-
{ return normDiff##L##_(src1, src2, mask, r, (int)len, cn); }
291-
292-
#define CV_DEF_NORM_DIFF_ALL(suffix, type, inftype, l1type, l2type) \
293-
CV_DEF_NORM_DIFF_FUNC(Inf, suffix, type, inftype) \
294-
CV_DEF_NORM_DIFF_FUNC(L1, suffix, type, l1type) \
295-
CV_DEF_NORM_DIFF_FUNC(L2, suffix, type, l2type)
296-
297-
CV_DEF_NORM_DIFF_ALL(8u, uchar, int, int, int)
298-
CV_DEF_NORM_DIFF_ALL(8s, schar, int, int, int)
299-
CV_DEF_NORM_DIFF_ALL(16u, ushort, int, int, double)
300-
CV_DEF_NORM_DIFF_ALL(16s, short, int, int, double)
301-
CV_DEF_NORM_DIFF_ALL(32s, int, int, double, double)
302-
CV_DEF_NORM_DIFF_ALL(32f, float, float, double, double)
303-
CV_DEF_NORM_DIFF_ALL(64f, double, double, double, double)
304-
305221
typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int);
306222
typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int);
307223

308-
static NormDiffFunc getNormDiffFunc(int normType, int depth)
309-
{
310-
static NormDiffFunc normDiffTab[3][8] =
311-
{
312-
{
313-
(NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s,
314-
(NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s,
315-
(NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f),
316-
(NormDiffFunc)normDiffInf_64f, 0
317-
},
318-
{
319-
(NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s,
320-
(NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s,
321-
(NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f),
322-
(NormDiffFunc)normDiffL1_64f, 0
323-
},
324-
{
325-
(NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s,
326-
(NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s,
327-
(NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f),
328-
(NormDiffFunc)normDiffL2_64f, 0
329-
}
330-
};
331-
332-
return normDiffTab[normType][depth];
333-
}
334-
335224
#ifdef HAVE_OPENCL
336225

337226
static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result )
@@ -520,6 +409,10 @@ static NormFunc getNormFunc(int normType, int depth) {
520409
CV_INSTRUMENT_REGION();
521410
CV_CPU_DISPATCH(getNormFunc, (normType, depth), CV_CPU_DISPATCH_MODES_ALL);
522411
}
412+
static NormDiffFunc getNormDiffFunc(int normType, int depth) {
413+
CV_INSTRUMENT_REGION();
414+
CV_CPU_DISPATCH(getNormDiffFunc, (normType, depth), CV_CPU_DISPATCH_MODES_ALL);
415+
}
523416

524417
double norm( InputArray _src, int normType, InputArray _mask )
525418
{
@@ -1050,38 +943,29 @@ double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask
1050943
normType == NORM_L2 || normType == NORM_L2SQR ||
1051944
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
1052945

946+
NormDiffFunc func = getNormDiffFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
947+
CV_Assert( func != 0 );
948+
1053949
if( src1.isContinuous() && src2.isContinuous() && mask.empty() )
1054950
{
1055951
size_t len = src1.total()*src1.channels();
1056952
if( len == (size_t)(int)len )
1057953
{
1058954
if( src1.depth() == CV_32F )
1059955
{
1060-
const float* data1 = src1.ptr<float>();
1061-
const float* data2 = src2.ptr<float>();
956+
const uchar* data1 = src1.ptr<const uchar>();
957+
const uchar* data2 = src2.ptr<const uchar>();
1062958

1063-
if( normType == NORM_L2 )
1064-
{
1065-
double result = 0;
1066-
GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1067-
return std::sqrt(result);
1068-
}
1069-
if( normType == NORM_L2SQR )
1070-
{
1071-
double result = 0;
1072-
GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1);
1073-
return result;
1074-
}
1075-
if( normType == NORM_L1 )
959+
if( normType == NORM_L2 || normType == NORM_L2SQR || normType == NORM_L1 )
1076960
{
1077961
double result = 0;
1078-
GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1);
1079-
return result;
962+
func(data1, data2, 0, (uchar*)&result, (int)len, 1);
963+
return normType == NORM_L2 ? std::sqrt(result) : result;
1080964
}
1081965
if( normType == NORM_INF )
1082966
{
1083967
float result = 0;
1084-
GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1);
968+
func(data1, data2, 0, (uchar*)&result, (int)len, 1);
1085969
return result;
1086970
}
1087971
}
@@ -1115,9 +999,6 @@ double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask
1115999
return result;
11161000
}
11171001

1118-
NormDiffFunc func = getNormDiffFunc(normType >> 1, depth == CV_16F ? CV_32F : depth);
1119-
CV_Assert( func != 0 );
1120-
11211002
const Mat* arrays[] = {&src1, &src2, &mask, 0};
11221003
uchar* ptrs[3] = {};
11231004
union

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.