From eebd8977e158d81a4aaf41653a038b82cf6507d4 Mon Sep 17 00:00:00 2001
From: quic-xuezha <quic_xuezha@quicinc.com>
Date: Wed, 7 May 2025 18:45:02 +0800
Subject: [PATCH 1/4] Merge pull request #3922 from CodeLinaro:xuezha_4thPost

Add WarpPerspective in FastCV extension #3922

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 .../fastcv/include/opencv2/fastcv/warp.hpp    |  16 +-
 modules/fastcv/perf/perf_warp.cpp             |  78 +++++---
 modules/fastcv/src/warp.cpp                   | 175 ++++++++++++++----
 modules/fastcv/test/test_warp.cpp             |  94 +++++++---
 4 files changed, 277 insertions(+), 86 deletions(-)
diff --git a/modules/fastcv/include/opencv2/fastcv/warp.hpp b/modules/fastcv/include/opencv2/fastcv/warp.hpp
index 8f58cd36577..2c62b0cb313 100644
--- a/modules/fastcv/include/opencv2/fastcv/warp.hpp
+++ b/modules/fastcv/include/opencv2/fastcv/warp.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -17,6 +17,20 @@ namespace fastcv {
 //! @addtogroup fastcv
 //! @{
 
+/**
+ * @brief   Transform an image using perspective transformation, same as cv::warpPerspective but not bit-exact.
+ * @param _src          Input 8-bit image.
+ * @param _dst          Output 8-bit image.
+ * @param _M0           3x3 perspective transformation matrix.
+ * @param dsize         Size of the output image.
+ * @param interpolation Interpolation method. Only cv::INTER_NEAREST, cv::INTER_LINEAR and cv::INTER_AREA are supported.
+ * @param borderType    Pixel extrapolation method. Only cv::BORDER_CONSTANT, cv::BORDER_REPLICATE and cv::BORDER_TRANSPARENT
+ *                      are supported.
+ * @param borderValue   Value used in case of a constant border.
+ */
+CV_EXPORTS_W void warpPerspective(InputArray _src, OutputArray _dst, InputArray _M0, Size dsize, int interpolation, int borderType,
+    const Scalar&  borderValue);
+
 /**
  * @brief Perspective warp two images using the same transformation. Bi-linear interpolation is used where applicable.
  *        For example, to warp a grayscale image and an alpha image at the same time, or warp two color channels.
diff --git a/modules/fastcv/perf/perf_warp.cpp b/modules/fastcv/perf/perf_warp.cpp
index 231056aef56..a2ec2b65cee 100644
--- a/modules/fastcv/perf/perf_warp.cpp
+++ b/modules/fastcv/perf/perf_warp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -7,31 +7,19 @@
 
 namespace opencv_test {
 
-typedef perf::TestBaseWithParam<Size> WarpPerspective2PlanePerfTest;
-
-PERF_TEST_P(WarpPerspective2PlanePerfTest, run,
-    ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p))
+static void getInvertMatrix(Mat& src, Size dstSize, Mat& M)
 {
-    cv::Size dstSize = GetParam();
-    cv::Mat img = imread(cvtest::findDataFile("cv/shared/baboon.png"));
-    Mat src(img.rows, img.cols, CV_8UC1);
-    cvtColor(img,src,cv::COLOR_BGR2GRAY);
-    cv::Mat dst1, dst2, mat;
-    mat.create(3,3,CV_32FC1);
-    dst1.create(dstSize,CV_8UC1);
-    dst2.create(dstSize,CV_8UC1);
-
     RNG& rng = cv::theRNG();
     Point2f s[4], d[4];
 
     s[0] = Point2f(0,0);
     d[0] = Point2f(0,0);
     s[1] = Point2f(src.cols-1.f,0);
-    d[1] = Point2f(dst1.cols-1.f,0);
+    d[1] = Point2f(dstSize.width-1.f,0);
     s[2] = Point2f(src.cols-1.f,src.rows-1.f);
-    d[2] = Point2f(dst1.cols-1.f,dst1.rows-1.f);
+    d[2] = Point2f(dstSize.width-1.f,dstSize.height-1.f);
     s[3] = Point2f(0,src.rows-1.f);
-    d[3] = Point2f(0,dst1.rows-1.f);
+    d[3] = Point2f(0,dstSize.height-1.f);
 
     float buffer[16];
     Mat tmp( 1, 16, CV_32FC1, buffer );
@@ -41,18 +29,64 @@ PERF_TEST_P(WarpPerspective2PlanePerfTest, run,
     {
         s[i].x += buffer[i*4]*src.cols/2;
         s[i].y += buffer[i*4+1]*src.rows/2;
-        d[i].x += buffer[i*4+2]*dst1.cols/2;
-        d[i].y += buffer[i*4+3]*dst1.rows/2;
+        d[i].x += buffer[i*4+2]*dstSize.width/2;
+        d[i].y += buffer[i*4+3]*dstSize.height/2;
     }
 
-    cv::getPerspectiveTransform( s, d ).convertTo( mat, mat.depth() );
+    cv::getPerspectiveTransform( s, d ).convertTo( M, M.depth() );
+
     // Invert the perspective matrix
-    invert(mat,mat);
+    invert(M,M);
+}
+
+typedef perf::TestBaseWithParam<Size> WarpPerspective2PlanePerfTest;
+
+PERF_TEST_P(WarpPerspective2PlanePerfTest, run,
+    ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p))
+{
+    cv::Size dstSize = GetParam();
+    cv::Mat img = imread(cvtest::findDataFile("cv/shared/baboon.png"));
+    Mat src(img.rows, img.cols, CV_8UC1);
+    cvtColor(img,src,cv::COLOR_BGR2GRAY);
+    cv::Mat dst1, dst2, matrix;
+    matrix.create(3,3,CV_32FC1);
+
+    getInvertMatrix(src, dstSize, matrix);
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::warpPerspective2Plane(src, src, dst1, dst2, matrix, dstSize);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+typedef perf::TestBaseWithParam<tuple<Size, int, int>> WarpPerspectivePerfTest;
+
+PERF_TEST_P(WarpPerspectivePerfTest, run,
+    ::testing::Combine( ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p),
+                        ::testing::Values(INTER_NEAREST, INTER_LINEAR, INTER_AREA),
+                        ::testing::Values(BORDER_CONSTANT, BORDER_REPLICATE, BORDER_TRANSPARENT)))
+{
+    cv::Size dstSize = get<0>(GetParam());
+    int interplation = get<1>(GetParam());
+    int borderType   = get<2>(GetParam());
+    cv::Scalar borderValue = Scalar::all(100);
+
+    cv::Mat src = imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    EXPECT_FALSE(src.empty());
+
+    cv::Mat dst, matrix, ref;
+    matrix.create(3, 3, CV_32FC1);
+
+    getInvertMatrix(src, dstSize, matrix);
 
     while (next())
     {
         startTimer();
-        cv::fastcv::warpPerspective2Plane(src, src, dst1, dst2, mat, dstSize);
+        cv::fastcv::warpPerspective(src, dst, matrix, dstSize, interplation, borderType, borderValue);
         stopTimer();
     }
 
diff --git a/modules/fastcv/src/warp.cpp b/modules/fastcv/src/warp.cpp
index 01f83bdf510..ac806ffc4ae 100644
--- a/modules/fastcv/src/warp.cpp
+++ b/modules/fastcv/src/warp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -12,49 +12,52 @@ class FcvWarpPerspectiveLoop_Invoker : public cv::ParallelLoopBody
 {
     public:
 
-    FcvWarpPerspectiveLoop_Invoker(InputArray _src1, InputArray _src2, OutputArray _dst1, OutputArray _dst2, InputArray _M0,
-        Size _dsize) : cv::ParallelLoopBody()
-    {
-        src1 = _src1.getMat();
-        src2 = _src2.getMat();
-        dsize = _dsize;
-
-        _dst1.create(dsize, src1.type());
-        _dst2.create(dsize, src2.type());
-        dst1 = _dst1.getMat();
-        dst2 = _dst2.getMat();
-
-        M = _M0.getMat();
-    }
+    FcvWarpPerspectiveLoop_Invoker(const Mat& _src1, const Mat& _src2, Mat& _dst1, Mat& _dst2,
+        const float * _M, fcvInterpolationType _interpolation = FASTCV_INTERPOLATION_TYPE_NEAREST_NEIGHBOR,
+        fcvBorderType _borderType = fcvBorderType::FASTCV_BORDER_UNDEFINED, const int _borderValue = 0)
+        : ParallelLoopBody(), src1(_src1), src2(_src2), dst1(_dst1), dst2(_dst2), M(_M), interpolation(_interpolation),
+        borderType(_borderType), borderValue(_borderValue)
+    {}
 
     virtual void operator()(const cv::Range& range) const CV_OVERRIDE
     {
-        uchar* dst1_ptr = dst1.data + range.start*dst1.step;
-        uchar* dst2_ptr = dst2.data + range.start*dst2.step;
+        uchar* dst1_ptr = dst1.data + range.start * dst1.step;
         int rangeHeight = range.end - range.start;
 
         float rangeMatrix[9];
-        rangeMatrix[0] = M.at<float>(0,0);
-        rangeMatrix[1] = M.at<float>(0,1);
-        rangeMatrix[2] = M.at<float>(0,2)+range.start*M.at<float>(0,1);
-        rangeMatrix[3] = M.at<float>(1,0);
-        rangeMatrix[4] = M.at<float>(1,1);
-        rangeMatrix[5] = M.at<float>(1,2)+range.start*M.at<float>(1,1);
-        rangeMatrix[6] = M.at<float>(2,0);
-        rangeMatrix[7] = M.at<float>(2,1);
-        rangeMatrix[8] = M.at<float>(2,2)+range.start*M.at<float>(2,1);
-
-        fcv2PlaneWarpPerspectiveu8(src1.data, src2.data, src1.cols, src1.rows, src1.step, src2.step, dst1_ptr, dst2_ptr,
-            dsize.width, rangeHeight, dst1.step, dst2.step, rangeMatrix);
+        rangeMatrix[0] = M[0];
+        rangeMatrix[1] = M[1];
+        rangeMatrix[2] = M[2]+range.start*M[1];
+        rangeMatrix[3] = M[3];
+        rangeMatrix[4] = M[4];
+        rangeMatrix[5] = M[5]+range.start*M[4];
+        rangeMatrix[6] = M[6];
+        rangeMatrix[7] = M[7];
+        rangeMatrix[8] = M[8]+range.start*M[7];
+
+        if ((src2.empty()) || (dst2.empty()))
+        {
+            fcvWarpPerspectiveu8_v5(src1.data, src1.cols, src1.rows, src1.step, src1.channels(), dst1_ptr, dst1.cols, rangeHeight,
+                dst1.step, rangeMatrix, interpolation, borderType, borderValue);
+        }
+        else
+        {
+            uchar* dst2_ptr = dst2.data + range.start * dst2.step;
+            fcv2PlaneWarpPerspectiveu8(src1.data, src2.data, src1.cols, src1.rows, src1.step, src2.step, dst1_ptr, dst2_ptr,
+                dst1.cols, rangeHeight, dst1.step, dst2.step, rangeMatrix);
+        }
     }
 
     private:
-    Mat         src1;
-    Mat         src2;
-    Mat         dst1;
-    Mat         dst2;
-    Mat         M;
-    Size        dsize;
+
+    const Mat&              src1;
+    const Mat&              src2;
+    Mat&                    dst1;
+    Mat&                    dst2;
+    const float*            M;
+    fcvInterpolationType    interpolation;
+    fcvBorderType           borderType;
+    int                     borderValue;
 
     FcvWarpPerspectiveLoop_Invoker(const FcvWarpPerspectiveLoop_Invoker &);  // = delete;
     const FcvWarpPerspectiveLoop_Invoker& operator= (const FcvWarpPerspectiveLoop_Invoker &);  // = delete;
@@ -68,8 +71,108 @@ void warpPerspective2Plane(InputArray _src1, InputArray _src2, OutputArray _dst1
     CV_Assert(!_src2.empty() && _src2.type() == CV_8UC1);
     CV_Assert(!_M0.empty());
 
+    Mat src1 = _src1.getMat();
+    Mat src2 = _src2.getMat();
+
+    _dst1.create(dsize, src1.type());
+    _dst2.create(dsize, src2.type());
+    Mat dst1 = _dst1.getMat();
+    Mat dst2 = _dst2.getMat();
+
+    Mat M0 = _M0.getMat();
+    CV_Assert((M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3);
+    float matrix[9];
+    Mat M(3, 3, CV_32F, matrix);
+    M0.convertTo(M, M.type());
+
+    int nThreads = getNumThreads();
+    int nStripes = nThreads > 1 ? 2*nThreads : 1;
+
+    cv::parallel_for_(cv::Range(0, dsize.height),
+        FcvWarpPerspectiveLoop_Invoker(src1, src2, dst1, dst2, matrix), nStripes);
+}
+
+void warpPerspective(InputArray _src, OutputArray _dst, InputArray _M0, Size dsize, int interpolation, int borderType,
+    const Scalar&  borderValue)
+{
+    Mat src = _src.getMat();
+
+    _dst.create(dsize, src.type());
+    Mat dst = _dst.getMat();
+
+    Mat M0 = _M0.getMat();
+    CV_Assert((M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3);
+    float matrix[9];
+    Mat M(3, 3, CV_32F, matrix);
+    M0.convertTo(M, M.type());
+
+    // Do not support inplace case
+    CV_Assert(src.data != dst.data);
+    // Only support CV_8U
+    CV_Assert(src.depth() == CV_8U);
+
+    INITIALIZATION_CHECK;
+
+    fcvBorderType           fcvBorder;
+    uint8_t                 fcvBorderValue = 0;
+    fcvInterpolationType    fcvInterpolation;
+
+    switch (borderType)
+    {
+        case BORDER_CONSTANT:
+        {
+            // Border value should be same
+            CV_Assert((borderValue[0] == borderValue[1]) &&
+                      (borderValue[0] == borderValue[2]) &&
+                      (borderValue[0] == borderValue[3]));
+
+            fcvBorder       = fcvBorderType::FASTCV_BORDER_CONSTANT;
+            fcvBorderValue  = static_cast<uint8_t>(borderValue[0]);
+            break;
+        }
+        case BORDER_REPLICATE:
+        {
+            fcvBorder = fcvBorderType::FASTCV_BORDER_REPLICATE;
+            break;
+        }
+        case BORDER_TRANSPARENT:
+        {
+            fcvBorder = fcvBorderType::FASTCV_BORDER_UNDEFINED;
+            break;
+        }
+        default:
+            CV_Error(cv::Error::StsBadArg, cv::format("Border type:%d is not supported", borderType));
+    }
+
+    switch(interpolation)
+    {
+        case INTER_NEAREST:
+        {
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_NEAREST_NEIGHBOR;
+            break;
+        }
+        case INTER_LINEAR:
+        {
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_BILINEAR;
+            break;
+        }
+        case INTER_AREA:
+        {
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_AREA;
+            break;
+        }
+        default:
+            CV_Error(cv::Error::StsBadArg, cv::format("Interpolation type:%d is not supported", interpolation));
+    }
+
+    int nThreads = cv::getNumThreads();
+    int nStripes = nThreads > 1 ? 2*nThreads : 1;
+
+    // placeholder
+    Mat tmp;
+
     cv::parallel_for_(cv::Range(0, dsize.height),
-        FcvWarpPerspectiveLoop_Invoker(_src1, _src2, _dst1, _dst2, _M0, dsize), 1);
+        FcvWarpPerspectiveLoop_Invoker(src, tmp, dst, tmp, matrix, fcvInterpolation, fcvBorder, fcvBorderValue), nStripes);
 }
 
 } // fastcv::
diff --git a/modules/fastcv/test/test_warp.cpp b/modules/fastcv/test/test_warp.cpp
index 240262f93ca..a87902ad102 100644
--- a/modules/fastcv/test/test_warp.cpp
+++ b/modules/fastcv/test/test_warp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -7,30 +7,19 @@
 
 namespace opencv_test { namespace {
 
-typedef testing::TestWithParam<cv::Size> WarpPerspective2Plane;
-
-TEST_P(WarpPerspective2Plane, accuracy)
+static void getInvertMatrix(Mat& src, Size dstSize, Mat& M)
 {
-    cv::Size dstSize = GetParam();
-    cv::Mat img = imread(cvtest::findDataFile("cv/shared/baboon.png"));
-    Mat src(img.rows, img.cols, CV_8UC1);
-    cvtColor(img,src,cv::COLOR_BGR2GRAY);
-    cv::Mat dst1, dst2, mat, ref1, ref2;
-    mat.create(3,3,CV_32FC1);
-    dst1.create(dstSize,CV_8UC1);
-    dst2.create(dstSize,CV_8UC1);
-
-    RNG rng = RNG((uint64)-1);
+    RNG& rng = cv::theRNG();
     Point2f s[4], d[4];
 
     s[0] = Point2f(0,0);
     d[0] = Point2f(0,0);
     s[1] = Point2f(src.cols-1.f,0);
-    d[1] = Point2f(dst1.cols-1.f,0);
+    d[1] = Point2f(dstSize.width-1.f,0);
     s[2] = Point2f(src.cols-1.f,src.rows-1.f);
-    d[2] = Point2f(dst1.cols-1.f,dst1.rows-1.f);
+    d[2] = Point2f(dstSize.width-1.f,dstSize.height-1.f);
     s[3] = Point2f(0,src.rows-1.f);
-    d[3] = Point2f(0,dst1.rows-1.f);
+    d[3] = Point2f(0,dstSize.height-1.f);
 
     float buffer[16];
     Mat tmp( 1, 16, CV_32FC1, buffer );
@@ -40,30 +29,81 @@ TEST_P(WarpPerspective2Plane, accuracy)
     {
         s[i].x += buffer[i*4]*src.cols/2;
         s[i].y += buffer[i*4+1]*src.rows/2;
-        d[i].x += buffer[i*4+2]*dst1.cols/2;
-        d[i].y += buffer[i*4+3]*dst1.rows/2;
+        d[i].x += buffer[i*4+2]*dstSize.width/2;
+        d[i].y += buffer[i*4+3]*dstSize.height/2;
     }
 
-    cv::getPerspectiveTransform( s, d ).convertTo( mat, mat.depth() );
+    cv::getPerspectiveTransform( s, d ).convertTo( M, M.depth() );
+
     // Invert the perspective matrix
-    invert(mat,mat);
+    invert(M,M);
+}
+
+typedef testing::TestWithParam<cv::Size> WarpPerspective2Plane;
+
+TEST_P(WarpPerspective2Plane, accuracy)
+{
+    cv::Size dstSize = GetParam();
+    cv::Mat src = imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    EXPECT_FALSE(src.empty());
+
+    cv::Mat dst1, dst2, matrix, ref1, ref2;
+    matrix.create(3, 3, CV_32FC1);
 
-    cv::fastcv::warpPerspective2Plane(src, src, dst1, dst2, mat, dstSize);
-    cv::warpPerspective(src,ref1,mat,dstSize,(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP));
-    cv::warpPerspective(src,ref2,mat,dstSize,(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP));
+    getInvertMatrix(src, dstSize, matrix);
 
-    cv::Mat difference1, difference2, mask1,mask2;
+    cv::fastcv::warpPerspective2Plane(src, src, dst1, dst2, matrix, dstSize);
+    cv::warpPerspective(src, ref1, matrix, dstSize, (cv::INTER_LINEAR | cv::WARP_INVERSE_MAP),cv::BORDER_CONSTANT,Scalar(0));
+    cv::warpPerspective(src, ref2, matrix, dstSize, (cv::INTER_LINEAR | cv::WARP_INVERSE_MAP),cv::BORDER_CONSTANT,Scalar(0));
+
+    cv::Mat difference1, difference2, mask1, mask2;
     cv::absdiff(dst1, ref1, difference1);
     cv::absdiff(dst2, ref2, difference2);
+
+    // There are 1 or 2 difference in pixel value because algorithm is different, ignore those difference
     cv::threshold(difference1, mask1, 5, 255, cv::THRESH_BINARY);
     cv::threshold(difference2, mask2, 5, 255, cv::THRESH_BINARY);
     int num_diff_pixels_1 = cv::countNonZero(mask1);
     int num_diff_pixels_2 = cv::countNonZero(mask2);
 
-    EXPECT_LT(num_diff_pixels_1, src.size().area()*0.02);
-    EXPECT_LT(num_diff_pixels_2, src.size().area()*0.02);
+    // The border is different
+    EXPECT_LT(num_diff_pixels_1, (dstSize.width+dstSize.height)*5);
+    EXPECT_LT(num_diff_pixels_2, (dstSize.width+dstSize.height)*5);
+}
+
+typedef testing::TestWithParam<tuple<Size, int, int>> WarpPerspective;
+
+TEST_P(WarpPerspective, accuracy)
+{
+    cv::Size dstSize = get<0>(GetParam());
+    int interplation = get<1>(GetParam());
+    int borderType   = get<2>(GetParam());
+    cv::Scalar borderValue = Scalar::all(100);
+
+    cv::Mat src = imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    EXPECT_FALSE(src.empty());
+
+    cv::Mat dst, matrix, ref;
+    matrix.create(3, 3, CV_32FC1);
+
+    getInvertMatrix(src, dstSize, matrix);
+
+    cv::fastcv::warpPerspective(src, dst, matrix, dstSize, interplation, borderType, borderValue);
+    cv::warpPerspective(src, ref, matrix, dstSize, (interplation | cv::WARP_INVERSE_MAP), borderType, borderValue);
+
+    cv::Mat difference, mask;
+    cv::absdiff(dst, ref, difference);
+    cv::threshold(difference, mask, 10, 255, cv::THRESH_BINARY);
+    int num_diff_pixels = cv::countNonZero(mask);
+
+    EXPECT_LT(num_diff_pixels, src.size().area()*0.05);
 }
 
+INSTANTIATE_TEST_CASE_P(FastCV_Extension, WarpPerspective,Combine(
+                   ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p),
+                   ::testing::Values(INTER_NEAREST, INTER_LINEAR, INTER_AREA),
+                   ::testing::Values(BORDER_CONSTANT, BORDER_REPLICATE, BORDER_TRANSPARENT)
+));
 INSTANTIATE_TEST_CASE_P(FastCV_Extension, WarpPerspective2Plane, Values(perf::szVGA, perf::sz720p, perf::sz1080p));
 
 }

From 68c8cf83b7d8e1841b5340cb2bf2b8f27f1c4fe8 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@google.com>
Date: Tue, 20 May 2025 23:06:22 +0200
Subject: [PATCH 2/4] Properly fix inf/inf issue in MCC

Properly fix https://github.com/opencv/opencv_contrib/issues/3316

Without this fix, we could end up with 4 corners, which might seem
valid, while some of those are (0,0).
---
 modules/mcc/src/bound_min.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/mcc/src/bound_min.cpp b/modules/mcc/src/bound_min.cpp
index c2bc6e6c82c..bb2d8b428b7 100644
--- a/modules/mcc/src/bound_min.cpp
+++ b/modules/mcc/src/bound_min.cpp
@@ -167,7 +167,7 @@ void CBoundMin::calculate()
         j = (i + 1) % 4;
         Vcart = lines[i].cross(lines[j]);
         if (fabs(Vcart.z) <= 1e-6){
-            continue;
+            return;
         }
         Vhom.x = Vcart.x / Vcart.z;
         Vhom.y = Vcart.y / Vcart.z;

From 6e8ce301e0f90e368552c5aca38be256cc66181f Mon Sep 17 00:00:00 2001
From: Aakash Preetam <quic_apreetam@quicinc.com>
Date: Fri, 6 Jun 2025 15:00:12 +0530
Subject: [PATCH 3/4] Merge pull request #3931 from CodeLinaro:apreetam_5thPost

Add FastCV DSP Initialization, QcAllocator and FastCV DSP Extension APIs #3931

Merge with https://github.com/opencv/opencv/pull/27290

**Detailed Description**

This PR introduces FastCV DSP Extension APIs within the '**cv::fastcv::dsp**' namespace.
The following APIs have been added:

1. **fcvdspinit**: Initializes the FastCV DSP environment.
2. **fcvdspdeinit**: Deinitializes the FastCV DSP environment.
3. **sumOfAbsoluteDiffs**: Computes the sum of absolute differences of an image against an 8x8 template.
4. **thresholdOtsu**: Binarizes a grayscale image using Otsu's method.
5. **FFT**: Computes the 1D or 2D Fast Fourier Transform of a real-valued matrix.
6. **IFFT**: Computes the 1D or 2D Inverse Fast Fourier Transform of a complex-valued matrix.
7. **canny**: Applies the Canny edge detector to an 8-bit grayscale image.
8. **filter2D**: Applies a generic 2D filter to an image.

The **QcAllocator** has been added to manage memory allocations on Qualcomm's Chipsets. This allocator ensures that matrices are allocated using the Qualcomm hardware memory allocator, providing efficient DSP operations.

Requires updated binary from: https://github.com/opencv/opencv_3rdparty/pull/97
Requires binary from https://github.com/opencv/opencv_3rdparty/pull/95

Lib Hash Update: https://github.com/opencv/opencv/pull/27403

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 modules/fastcv/include/opencv2/fastcv.hpp     |   7 ++
 .../include/opencv2/fastcv/allocator.hpp      |  64 ++++++++++
 .../include/opencv2/fastcv/blur_dsp.hpp       |  33 ++++++
 .../include/opencv2/fastcv/dsp_init.hpp       |  49 ++++++++
 .../include/opencv2/fastcv/edges_dsp.hpp      |  38 ++++++
 .../fastcv/include/opencv2/fastcv/fft_dsp.hpp |  49 ++++++++
 .../fastcv/include/opencv2/fastcv/sad_dsp.hpp |  34 ++++++
 .../include/opencv2/fastcv/thresh_dsp.hpp     |  39 ++++++
 modules/fastcv/perf/perf_blur_dsp.cpp         |  73 ++++++++++++
 modules/fastcv/perf/perf_edges_dsp.cpp        |  56 +++++++++
 modules/fastcv/perf/perf_fft_dsp.cpp          |  85 +++++++++++++
 modules/fastcv/perf/perf_main.cpp             |   7 +-
 modules/fastcv/perf/perf_precomp.hpp          |   2 +
 modules/fastcv/perf/perf_sad_dsp.cpp          |  52 ++++++++
 modules/fastcv/perf/perf_thresh_dsp.cpp       |  52 ++++++++
 modules/fastcv/src/allocator.cpp              | 105 ++++++++++++++++
 modules/fastcv/src/blur_dsp.cpp               |  68 +++++++++++
 modules/fastcv/src/dsp_init.cpp               |  46 +++++++
 modules/fastcv/src/edges_dsp.cpp              |  63 ++++++++++
 modules/fastcv/src/fft_dsp.cpp                |  96 +++++++++++++++
 modules/fastcv/src/precomp.hpp                | 112 +++++++++++++++++-
 modules/fastcv/src/sad_dsp.cpp                |  46 +++++++
 modules/fastcv/src/thresh_dsp.cpp             |  55 +++++++++
 modules/fastcv/test/test_blur_dsp.cpp         |  75 ++++++++++++
 modules/fastcv/test/test_edges_dsp.cpp        |  39 ++++++
 modules/fastcv/test/test_fft_dsp.cpp          |  98 +++++++++++++++
 modules/fastcv/test/test_main.cpp             |   7 +-
 modules/fastcv/test/test_precomp.hpp          |   2 +
 modules/fastcv/test/test_sad_dsp.cpp          |  44 +++++++
 modules/fastcv/test/test_thresh_dsp.cpp       |  92 ++++++++++++++
 30 files changed, 1584 insertions(+), 4 deletions(-)
 create mode 100644 modules/fastcv/include/opencv2/fastcv/allocator.hpp
 create mode 100644 modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp
 create mode 100644 modules/fastcv/include/opencv2/fastcv/dsp_init.hpp
 create mode 100644 modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp
 create mode 100644 modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp
 create mode 100644 modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp
 create mode 100644 modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp
 create mode 100644 modules/fastcv/perf/perf_blur_dsp.cpp
 create mode 100644 modules/fastcv/perf/perf_edges_dsp.cpp
 create mode 100644 modules/fastcv/perf/perf_fft_dsp.cpp
 create mode 100644 modules/fastcv/perf/perf_sad_dsp.cpp
 create mode 100644 modules/fastcv/perf/perf_thresh_dsp.cpp
 create mode 100644 modules/fastcv/src/allocator.cpp
 create mode 100644 modules/fastcv/src/blur_dsp.cpp
 create mode 100644 modules/fastcv/src/dsp_init.cpp
 create mode 100644 modules/fastcv/src/edges_dsp.cpp
 create mode 100644 modules/fastcv/src/fft_dsp.cpp
 create mode 100644 modules/fastcv/src/sad_dsp.cpp
 create mode 100644 modules/fastcv/src/thresh_dsp.cpp
 create mode 100644 modules/fastcv/test/test_blur_dsp.cpp
 create mode 100644 modules/fastcv/test/test_edges_dsp.cpp
 create mode 100644 modules/fastcv/test/test_fft_dsp.cpp
 create mode 100644 modules/fastcv/test/test_sad_dsp.cpp
 create mode 100644 modules/fastcv/test/test_thresh_dsp.cpp

diff --git a/modules/fastcv/include/opencv2/fastcv.hpp b/modules/fastcv/include/opencv2/fastcv.hpp
index 292e83a2dc3..6626c4c9b5a 100644
--- a/modules/fastcv/include/opencv2/fastcv.hpp
+++ b/modules/fastcv/include/opencv2/fastcv.hpp
@@ -30,6 +30,13 @@
 #include "opencv2/fastcv/thresh.hpp"
 #include "opencv2/fastcv/tracking.hpp"
 #include "opencv2/fastcv/warp.hpp"
+#include "opencv2/fastcv/allocator.hpp"
+#include "opencv2/fastcv/dsp_init.hpp"
+#include "opencv2/fastcv/sad_dsp.hpp"
+#include "opencv2/fastcv/thresh_dsp.hpp"
+#include "opencv2/fastcv/fft_dsp.hpp"
+#include "opencv2/fastcv/edges_dsp.hpp"
+#include "opencv2/fastcv/blur_dsp.hpp"
 
 /**
  * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions
diff --git a/modules/fastcv/include/opencv2/fastcv/allocator.hpp b/modules/fastcv/include/opencv2/fastcv/allocator.hpp
new file mode 100644
index 00000000000..a70666723ca
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/allocator.hpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_ALLOCATOR_HPP
+#define OPENCV_FASTCV_ALLOCATOR_HPP
+
+#include <opencv2/core.hpp>
+#include <set>
+#include <mutex>
+
+namespace cv {
+namespace fastcv {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Resource manager for FastCV allocations.
+ * This class manages active allocations.
+ */
+class QcResourceManager {
+public:
+    static QcResourceManager& getInstance();
+
+    void addAllocation(void* ptr);
+    void removeAllocation(void* ptr);
+
+private:
+    QcResourceManager() = default;
+    std::set<void*> activeAllocations;
+    std::mutex resourceMutex;
+};
+
+/**
+ * @brief Qualcomm's custom allocator.
+ * This allocator uses Qualcomm's memory management functions.
+ */
+class QcAllocator : public cv::MatAllocator {
+    public:
+        QcAllocator();
+        ~QcAllocator();
+    
+        cv::UMatData* allocate(int dims, const int* sizes, int type, void* data0, size_t* step, cv::AccessFlag flags, cv::UMatUsageFlags usageFlags) const CV_OVERRIDE;
+        bool allocate(cv::UMatData* u, cv::AccessFlag accessFlags, cv::UMatUsageFlags usageFlags) const CV_OVERRIDE;
+        void deallocate(cv::UMatData* u) const CV_OVERRIDE;
+};
+
+/**
+ * @brief Gets the default Qualcomm's allocator.
+ * This function returns a pointer to the default Qualcomm's allocator, which is optimized
+ * for use with DSP.
+ *
+ * @return Pointer to the default FastCV allocator.
+ */
+CV_EXPORTS cv::MatAllocator* getQcAllocator();
+
+//! @}
+
+} // namespace fastcv
+} // namespace cv
+
+#endif // OPENCV_FASTCV_ALLOCATOR_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp
new file mode 100644
index 00000000000..1228bdde458
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_BLUR_DSP_HPP
+#define OPENCV_FASTCV_BLUR_DSP_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Filter an image with non-separable kernel
+ * @param _src Intput image with type CV_8UC1, src size should be greater than 176*144
+ * @param _dst Output image with type CV_8UC1, CV_16SC1 or CV_32FC1
+ * @param ddepth The depth of output image
+ * @param _kernel Filer kernel data
+ */
+CV_EXPORTS void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel);
+
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_BLUR_DSP_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp b/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp
new file mode 100644
index 00000000000..942b7fdaa95
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_DSP_INIT_HPP
+#define OPENCV_FASTCV_DSP_INIT_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Initializes the FastCV DSP environment.
+ * 
+ * This function sets up the necessary environment and resources for the DSP to operate.
+ * It must be called once at the very beginning of the use case or program to ensure that 
+ * the DSP is properly initialized before any DSP-related operations are performed.
+ *
+ * @note This function must be called at the start of the use case or program, before any 
+ *       DSP-related operations.
+ * 
+ * @return int Returns 0 on success, and a non-zero value on failure.
+ */
+CV_EXPORTS int fcvdspinit();
+
+/**
+ * @brief Deinitializes the FastCV DSP environment.
+ * 
+ * This function releases the resources and environment set up by the 'fcvdspinit' function.
+ * It should be called before the use case or program exits to ensure that all DSP resources 
+ * are properly cleaned up and no memory leaks occur.
+ *
+ * @note This function must be called at the end of the use case or program, after all DSP-related 
+ *       operations are complete.
+ */
+CV_EXPORTS void fcvdspdeinit();
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_DSP_INIT_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp
new file mode 100644
index 00000000000..37b2aef5515
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_EDGES_DSP_HPP
+#define OPENCV_FASTCV_EDGES_DSP_HPP
+
+#include "opencv2/core/mat.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+/**
+* @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions
+*/
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Canny edge detector applied to a 8 bit grayscale image
+ * @param _src          Input image with type CV_8UC1
+ * @param _dst          Output 8-bit image containing the edge detection results
+ * @param lowThreshold  First threshold
+ * @param highThreshold Second threshold
+ * @param apertureSize  The Sobel kernel size for calculating gradient. Supported sizes are 3, 5 and 7.
+ * @param L2gradient    L2 Gradient or L1 Gradient
+*/
+CV_EXPORTS void Canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize = 3, bool L2gradient = false);
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif //OPENCV_FASTCV_EDGES_DSP_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp
new file mode 100644
index 00000000000..b4e4e44ecdc
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_FFT_DSP_HPP
+#define OPENCV_FASTCV_FFT_DSP_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+* @brief Computes the 1D or 2D Fast Fourier Transform of a real valued matrix.
+        For the 2D case, the width and height of the input and output matrix must be powers of 2.
+        For the 1D case, the height of the matrices must be 1, while the width must be a power of 2.
+
+* @param src Input array of CV_8UC1. The dimensions of the matrix must be powers of 2 for the 2D case,
+            and in the 1D case, the height must be 1, while the width must be a power of 2.
+* @param dst The computed FFT matrix of type CV_32FC2. The FFT Re and Im coefficients are stored in different channels.
+            Hence the dimensions of the dst are (srcWidth, srcHeight)
+*/
+CV_EXPORTS void FFT(InputArray src, OutputArray dst);
+
+/**
+* @brief Computes the 1D or 2D Inverse Fast Fourier Transform of a complex valued matrix.
+        For the 2D case, The width and height of the input and output matrix must be powers of 2.
+        For the 1D case, the height of the matrices must be 1, while the width must be a power of 2.
+
+* @param src Input array of type CV_32FC2 containing FFT Re and Im coefficients stored in separate channels.
+            The dimensions of the matrix must be powers of 2 for the 2D case, and in the 1D case, the height must be 1,
+            while the width must be a power of 2.
+* @param dst The computed IFFT matrix of type CV_8U. The matrix is real valued and has no imaginary components.
+            Hence the dimensions of the dst are (srcWidth , srcHeight)
+*/
+CV_EXPORTS void IFFT(InputArray src, OutputArray dst);
+
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_FFT_DSP_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp
new file mode 100644
index 00000000000..b9ae9079686
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_SAD_HPP
+#define OPENCV_FASTCV_SAD_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+/**
+ * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions
+ */
+
+//! @addtogroup fastcv
+//! @{
+/**
+ * @brief Sum of absolute differences of an image against an 8x8 template.
+ * @param _patch The first input image data, type CV_8UC1
+ * @param _src The input image data, type CV_8UC1
+ * @param _dst The output image data, type CV_16UC1
+*/
+CV_EXPORTS void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst);
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_SAD_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp
new file mode 100644
index 00000000000..25824e72097
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_THRESH_DSP_HPP
+#define OPENCV_FASTCV_THRESH_DSP_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Binarizes a grayscale image using Otsu's method.
+ *        Sets the pixel to max(255) if it's value is greater than the threshold;
+ *        else, set the pixel to min(0). The threshold is searched that minimizes
+ *        the intra-class variance (the variance within the class).
+ * 
+ * @param _src Input 8-bit grayscale image. Size of buffer is srcStride*srcHeight bytes.
+ * @param _dst Output 8-bit binarized image. Size of buffer is dstStride*srcHeight bytes.
+ * @param type Threshold type that can be either 0 or 1.
+ *             NOTE: For threshold type=0, the pixel is set as
+ *             maxValue if it's value is greater than the threshold; else, it is set as zero.
+ *             For threshold type=1, the pixel is set as zero if it's
+ *             value is greater than the threshold; else, it is set as maxValue.
+ */
+CV_EXPORTS void thresholdOtsu(InputArray _src, OutputArray _dst, bool type);
+
+//! @}
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_THRESH_DSP_HPP
\ No newline at end of file
diff --git a/modules/fastcv/perf/perf_blur_dsp.cpp b/modules/fastcv/perf/perf_blur_dsp.cpp
new file mode 100644
index 00000000000..133ba85cd56
--- /dev/null
+++ b/modules/fastcv/perf/perf_blur_dsp.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef perf::TestBaseWithParam<tuple<Size, int, int>> Filter2DPerfTest_DSP;
+
+PERF_TEST_P(Filter2DPerfTest_DSP, run,
+    ::testing::Combine(::testing::Values(perf::szVGA, perf::sz720p),                // image size
+                       ::testing::Values(CV_8U,CV_16S,CV_32F),                      // dst image depth
+                       ::testing::Values(3, 5, 7)                                   // kernel size
+                       )
+           )
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Size srcSize = get<0>(GetParam());
+    int ddepth = get<1>(GetParam());
+    int ksize = get<2>(GetParam());
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(srcSize, CV_8U);
+
+    cv::Mat kernel;
+    cv::Mat dst;
+    kernel.allocator = cv::fastcv::getQcAllocator();
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    switch (ddepth)
+    {
+        case CV_8U:
+        case CV_16S:
+        {
+            kernel.create(ksize,ksize,CV_8S);
+            break;
+        }
+        case CV_32F:
+        {
+            kernel.create(ksize,ksize,CV_32F);
+            break;
+        }
+        default:
+            break;
+    }
+
+    cv::randu(src, 0, 256);
+    cv::randu(kernel, INT8_MIN, INT8_MAX);
+    RNG& rng = cv::theRNG();
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(255));
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::filter2D(src, dst, ddepth, kernel);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
diff --git a/modules/fastcv/perf/perf_edges_dsp.cpp b/modules/fastcv/perf/perf_edges_dsp.cpp
new file mode 100644
index 00000000000..02f6e570ab8
--- /dev/null
+++ b/modules/fastcv/perf/perf_edges_dsp.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef perf::TestBaseWithParam<tuple<Size, int, pair<int, int>, bool>> CannyPerfTest;
+
+PERF_TEST_P(CannyPerfTest, run,
+    ::testing::Combine(::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // image size
+        ::testing::Values(3, 5, 7), // aperture size
+        ::testing::Values(make_pair(0, 50), make_pair(100, 150), make_pair(50, 150)), // low and high thresholds
+        ::testing::Values(false, true) // L2gradient
+    )
+)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Size srcSize = get<0>(GetParam());
+    int apertureSize = get<1>(GetParam());
+    auto thresholds = get<2>(GetParam());
+    bool L2gradient = get<3>(GetParam());
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(srcSize, CV_8UC1);
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    cv::randu(src, 0, 256);
+
+    int lowThreshold = thresholds.first;
+    int highThreshold = thresholds.second;
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::Canny(src, dst, lowThreshold, highThreshold, apertureSize, L2gradient);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    SANITY_CHECK_NOTHING();
+}
+
+} //namespace
diff --git a/modules/fastcv/perf/perf_fft_dsp.cpp b/modules/fastcv/perf/perf_fft_dsp.cpp
new file mode 100644
index 00000000000..468a92aa4ac
--- /dev/null
+++ b/modules/fastcv/perf/perf_fft_dsp.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef perf::TestBaseWithParam<cv::Size> FFT_DSPExtPerfTest;
+
+PERF_TEST_P_(FFT_DSPExtPerfTest, forward)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size size = GetParam();
+
+    RNG& rng = cv::theRNG();
+
+    Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::FFT(src, dst);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P_(FFT_DSPExtPerfTest, inverse)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size size = GetParam();
+
+    RNG& rng = cv::theRNG();
+
+    Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    Mat fwd, back;
+    fwd.allocator = cv::fastcv::getQcAllocator();
+    back.allocator = cv::fastcv::getQcAllocator();
+
+    cv::fastcv::dsp::FFT(src, fwd);
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::IFFT(fwd, back);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    SANITY_CHECK_NOTHING();
+}
+
+INSTANTIATE_TEST_CASE_P(FastCV_Extension, FFT_DSPExtPerfTest,
+    ::testing::Values(Size(256, 256), Size(512, 512)));
+
+} // namespace
diff --git a/modules/fastcv/perf/perf_main.cpp b/modules/fastcv/perf/perf_main.cpp
index a6824dfb007..b43a0a3d84b 100644
--- a/modules/fastcv/perf/perf_main.cpp
+++ b/modules/fastcv/perf/perf_main.cpp
@@ -5,4 +5,9 @@
 
 #include "perf_precomp.hpp"
 
-CV_PERF_TEST_MAIN(imgproc)
+static void initFastCVTests()
+{
+    cvtest::registerGlobalSkipTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+}
+
+CV_PERF_TEST_MAIN(imgproc, initFastCVTests())
diff --git a/modules/fastcv/perf/perf_precomp.hpp b/modules/fastcv/perf/perf_precomp.hpp
index e052a0098e2..0a229f70e08 100644
--- a/modules/fastcv/perf/perf_precomp.hpp
+++ b/modules/fastcv/perf/perf_precomp.hpp
@@ -14,4 +14,6 @@ namespace opencv_test {
 using namespace perf;
 } // namespace
 
+#define CV_TEST_TAG_FASTCV_SKIP_DSP "fastcv_skip_dsp"
+
 #endif
diff --git a/modules/fastcv/perf/perf_sad_dsp.cpp b/modules/fastcv/perf/perf_sad_dsp.cpp
new file mode 100644
index 00000000000..0acd730efd4
--- /dev/null
+++ b/modules/fastcv/perf/perf_sad_dsp.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef std::tuple<cv::Size /*srcSize*/> SumOfAbsDiffsPerfParams;
+typedef perf::TestBaseWithParam<SumOfAbsDiffsPerfParams> SumOfAbsDiffsPerfTest;
+
+PERF_TEST_P(SumOfAbsDiffsPerfTest, run,
+    ::testing::Values(cv::Size(640, 480),  // VGA
+        cv::Size(1280, 720),               // 720p
+        cv::Size(1920, 1080))              // 1080p
+)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    // Initialize FastCV DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    auto p = GetParam();
+    cv::Size srcSize = std::get<0>(p);
+
+    RNG& rng = cv::theRNG();
+    cv::Mat patch, src;
+
+    patch.allocator = cv::fastcv::getQcAllocator(); // Use FastCV allocator for patch
+    src.allocator = cv::fastcv::getQcAllocator(); // Use FastCV allocator for src
+
+    patch.create(8, 8, CV_8UC1);
+    src.create(srcSize, CV_8UC1);
+
+    cvtest::randUni(rng, patch, cv::Scalar::all(0), cv::Scalar::all(255));
+    cvtest::randUni(rng, src, cv::Scalar::all(0), cv::Scalar::all(255));
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator(); // Use FastCV allocator for dst
+
+    while(next())
+    {
+        startTimer();
+        cv::fastcv::dsp::sumOfAbsoluteDiffs(patch, src, dst);
+        stopTimer();
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
diff --git a/modules/fastcv/perf/perf_thresh_dsp.cpp b/modules/fastcv/perf/perf_thresh_dsp.cpp
new file mode 100644
index 00000000000..452b9464db1
--- /dev/null
+++ b/modules/fastcv/perf/perf_thresh_dsp.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef std::tuple<cv::Size, bool /*type*/> ThresholdOtsuPerfParams;
+typedef perf::TestBaseWithParam<ThresholdOtsuPerfParams> ThresholdOtsuPerfTest;
+
+PERF_TEST_P(ThresholdOtsuPerfTest, run,
+    ::testing::Combine(::testing::Values(Size(320, 240), Size(640, 480), Size(1280, 720), Size(1920, 1080)),
+        ::testing::Values(false, true) // type
+    )
+)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    auto p = GetParam();
+    cv::Size size = std::get<0>(p);
+    bool type = std::get<1>(p);
+
+    RNG& rng = cv::theRNG();
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::thresholdOtsu(src, dst, type);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
diff --git a/modules/fastcv/src/allocator.cpp b/modules/fastcv/src/allocator.cpp
new file mode 100644
index 00000000000..83147d2354a
--- /dev/null
+++ b/modules/fastcv/src/allocator.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+
+QcResourceManager& QcResourceManager::getInstance() {
+    static QcResourceManager instance;
+    return instance;
+}
+
+void QcResourceManager::addAllocation(void* ptr) {
+    std::lock_guard<std::mutex> lock(resourceMutex);
+    activeAllocations.insert(ptr);
+    CV_LOG_DEBUG(NULL, cv::format("Active Allocations: %zu", activeAllocations.size()));
+}
+
+void QcResourceManager::removeAllocation(void* ptr) {
+    std::lock_guard<std::mutex> lock(resourceMutex);
+    activeAllocations.erase(ptr);
+    CV_LOG_DEBUG(NULL, cv::format("Active Allocations: %zu", activeAllocations.size()));
+}
+
+QcAllocator::QcAllocator()
+{
+}
+
+QcAllocator::~QcAllocator()
+{
+}
+
+cv::UMatData* QcAllocator::allocate(int dims, const int* sizes, int type,
+                    void* data0, size_t* step, cv::AccessFlag flags,
+                    cv::UMatUsageFlags usageFlags) const
+{
+    CV_UNUSED(flags);
+    CV_UNUSED(usageFlags);
+
+    size_t total = CV_ELEM_SIZE(type);
+    for( int i = dims-1; i >= 0; i-- )
+    {
+        if( step )
+        {
+            if( data0 && step[i] != CV_AUTOSTEP )
+            {
+                CV_Assert(total <= step[i]);
+                total = step[i];
+            }
+            else
+                step[i] = total;
+        }
+        total *= sizes[i];
+    }
+    uchar* data = data0 ? (uchar*)data0 : (uchar*)fcvHwMemAlloc(total, 16);
+    cv::UMatData* u = new cv::UMatData(this);
+    u->data = u->origdata = data;
+    u->size = total;
+    if(data0)
+        u->flags |= cv::UMatData::USER_ALLOCATED;
+
+    // Add to active allocations
+    cv::fastcv::QcResourceManager::getInstance().addAllocation(data);
+
+    return u;
+}
+
+bool QcAllocator::allocate(cv::UMatData* u, cv::AccessFlag accessFlags, cv::UMatUsageFlags usageFlags) const
+{
+    CV_UNUSED(accessFlags);
+    CV_UNUSED(usageFlags);
+
+    return u != nullptr;
+}
+
+void QcAllocator::deallocate(cv::UMatData* u) const
+{
+    if(!u)
+        return;
+
+    CV_Assert(u->urefcount == 0);
+    CV_Assert(u->refcount == 0);
+    if( !(u->flags & cv::UMatData::USER_ALLOCATED) )
+    {
+        fcvHwMemFree(u->origdata);
+
+        // Remove from active allocations
+        cv::fastcv::QcResourceManager::getInstance().removeAllocation(u->origdata);
+        u->origdata = 0;
+    }
+
+    delete u;
+}
+
+cv::MatAllocator* getQcAllocator()
+{
+    static cv::MatAllocator* allocator = new QcAllocator;
+    return allocator;
+}
+
+}
+}
diff --git a/modules/fastcv/src/blur_dsp.cpp b/modules/fastcv/src/blur_dsp.cpp
new file mode 100644
index 00000000000..b6147b54ba3
--- /dev/null
+++ b/modules/fastcv/src/blur_dsp.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel)
+{
+    CV_Assert(
+        !_src.empty() && 
+        _src.type() == CV_8UC1 && 
+        IS_FASTCV_ALLOCATED(_src.getMat()) && 
+        IS_FASTCV_ALLOCATED(_kernel.getMat())
+    );
+
+    Mat kernel = _kernel.getMat();
+
+    Size ksize = kernel.size();
+    CV_Assert(ksize.width == ksize.height);
+    CV_Assert(ksize.width % 2 == 1);
+
+    _dst.create(_src.size(), ddepth);
+    Mat src = _src.getMat();
+    Mat dst = _dst.getMat();
+
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+
+    switch (ddepth)
+    {
+        case CV_8U:
+        {
+            if(ksize.width == 3)
+                fcvFilterCorr3x3s8_v2Q((int8_t*)kernel.data, src.data, src.cols, src.rows, src.step, dst.data, dst.step);
+            else
+                fcvFilterCorrNxNu8Q((int8_t*)kernel.data, ksize.width, 0, src.data, src.cols, src.rows, src.step, dst.data, dst.step);
+            
+            break;
+        }
+        case CV_16S:
+        {
+            fcvFilterCorrNxNu8s16Q((int8_t*)kernel.data, ksize.width, 0, src.data, src.cols, src.rows, src.step, (int16_t*)dst.data, dst.step);
+            break;
+        }
+        case CV_32F:
+        {
+            fcvFilterCorrNxNu8f32Q((float32_t*)kernel.data, ksize.width, src.data, src.cols, src.rows, src.step, (float32_t*)dst.data, dst.step);
+            break;
+        }
+        default:
+        {
+            CV_Error(cv::Error::StsBadArg, cv::format("Kernel Size:%d, Dst type:%s is not supported", ksize.width,
+                depthToString(ddepth)));
+        }
+    }
+}
+
+} // dsp::
+} // fastcv::
+} // cv::
\ No newline at end of file
diff --git a/modules/fastcv/src/dsp_init.cpp b/modules/fastcv/src/dsp_init.cpp
new file mode 100644
index 00000000000..ee0bff8ba1d
--- /dev/null
+++ b/modules/fastcv/src/dsp_init.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+//CHANGE FASTCV Q6 INIT
+int fcvdspinit()
+{
+    FastCvDspContext& context = FastCvDspContext::getContext();
+    
+    if (context.isInitialized()) {
+        CV_LOG_INFO(NULL, "FastCV DSP already initialized, skipping initialization");
+        return 0;
+    }
+    if (!context.initialize()) {
+        CV_LOG_ERROR(NULL, "Failed to initialize FastCV DSP");
+        return -1;
+    }
+    CV_LOG_INFO(NULL, "FastCV DSP initialized successfully");
+    return 0;
+}
+
+void fcvdspdeinit()
+{
+    // Deinitialize the DSP environment
+    FastCvDspContext& context = FastCvDspContext::getContext();
+    
+    if (!context.isInitialized()) {
+        CV_LOG_INFO(NULL, "FastCV DSP already deinitialized, skipping deinitialization");
+        return;
+    }
+    if (!context.deinitialize()) {
+        CV_LOG_ERROR(NULL, "Failed to deinitialize FastCV DSP");
+    }
+    CV_LOG_INFO(NULL, "FastCV DSP deinitialized successfully");
+}
+
+
+} // namespace dsp
+} // namespace fastcv
+} // namespace cv
\ No newline at end of file
diff --git a/modules/fastcv/src/edges_dsp.cpp b/modules/fastcv/src/edges_dsp.cpp
new file mode 100644
index 00000000000..ea121e73d04
--- /dev/null
+++ b/modules/fastcv/src/edges_dsp.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+void Canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize, bool L2gradient)
+{
+    CV_Assert(
+        !_src.empty() && 
+        lowThreshold <= highThreshold &&
+        IS_FASTCV_ALLOCATED(_src.getMat())
+    );
+
+    int type = _src.type();
+    CV_Assert(type == CV_8UC1);
+    CV_Assert(_src.step() % 8 == 0);
+
+    Size size = _src.size();
+    _dst.create(size, type);
+    Mat src = _src.getMat();
+    CV_Assert(src.step >= (size_t)src.cols);
+    CV_Assert(reinterpret_cast<uintptr_t>(src.data) % 8 == 0);
+
+    Mat dst = _dst.getMat();
+
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+    CV_Assert(reinterpret_cast<uintptr_t>(dst.data) % 8 == 0);
+    CV_Assert(dst.step >= (size_t)src.cols);
+
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+
+    fcvNormType norm;
+
+    if (L2gradient)
+        norm = FASTCV_NORM_L2;
+    else
+        norm = FASTCV_NORM_L1;
+
+    int16_t* gx = (int16_t*)fcvHwMemAlloc(src.cols * src.rows * sizeof(int16_t), 16);
+    int16_t* gy = (int16_t*)fcvHwMemAlloc(src.cols * src.rows * sizeof(int16_t), 16);
+    uint32_t gstride = 2 * src.cols;
+    fcvStatus status = fcvFilterCannyu8Q((uint8_t*)src.data, src.cols, src.rows, src.step, apertureSize, lowThreshold, highThreshold, norm, (uint8_t*)dst.data, dst.step, gx, gy, gstride);
+    fcvHwMemFree(gx);
+    fcvHwMemFree(gy);
+
+    if (status != FASTCV_SUCCESS)
+    {
+        std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
+        CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
+    }
+}
+
+} // dsp::
+} // fastcv::
+} // cv::
\ No newline at end of file
diff --git a/modules/fastcv/src/fft_dsp.cpp b/modules/fastcv/src/fft_dsp.cpp
new file mode 100644
index 00000000000..f3fd07024ea
--- /dev/null
+++ b/modules/fastcv/src/fft_dsp.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+static bool isPow2(int x)
+{
+    return x && (!(x & (x - 1)));
+}
+
+void FFT(InputArray _src, OutputArray _dst)
+{
+    CV_Assert(
+        !_src.empty() && 
+        _src.type() == CV_8UC1 && 
+        IS_FASTCV_ALLOCATED(_src.getMat())
+    );
+
+    CV_Assert(isPow2(_src.rows()) || _src.rows() == 1);
+    CV_Assert(isPow2(_src.cols()));
+    CV_Assert(_src.step() % 8 == 0);
+    CV_Assert(static_cast<unsigned long>(_src.rows() * _src.cols()) > MIN_REMOTE_BUF_SIZE);
+
+    Mat src = _src.getMat();
+    CV_Assert(reinterpret_cast<uintptr_t>(src.data) % 8 == 0);
+
+    _dst.create(_src.rows(), _src.cols(), CV_32FC2);
+    CV_Assert(_dst.step() % 8 == 0);
+    Mat dst = _dst.getMat();
+
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+    CV_Assert(reinterpret_cast<uintptr_t>(dst.data) % 8 == 0);
+    
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+
+    fcvStatus status = fcvFFTu8Q(src.data, src.cols, src.rows, src.step,
+        (float*)dst.data, dst.step);
+
+    if (status != FASTCV_SUCCESS)
+    {
+        std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
+        CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
+    }
+}
+
+void IFFT(InputArray _src, OutputArray _dst)
+{
+    CV_Assert(
+        !_src.empty() && 
+        _src.type() == CV_32FC2 &&
+        IS_FASTCV_ALLOCATED(_src.getMat())
+    );
+
+    CV_Assert(isPow2(_src.rows()) || _src.rows() == 1);
+    CV_Assert(isPow2(_src.cols()));
+
+    CV_Assert(_src.step() % 8 == 0);
+    CV_Assert(static_cast<unsigned long>(_src.rows() * _src.cols() * sizeof(float32_t)) > MIN_REMOTE_BUF_SIZE);
+
+    Mat src = _src.getMat();
+
+    CV_Assert(reinterpret_cast<uintptr_t>(src.data) % 8 == 0);
+
+    _dst.create(_src.rows(), _src.cols(), CV_8UC1);
+
+    CV_Assert(_dst.step() % 8 == 0);
+
+    Mat dst = _dst.getMat();
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+    CV_Assert(reinterpret_cast<uintptr_t>(dst.data) % 8 == 0);
+
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+
+    fcvStatus status = fcvIFFTf32Q((const float*)src.data, src.cols * 2, src.rows, src.step,
+        dst.data, dst.step);
+
+    if (status != FASTCV_SUCCESS)
+    {
+        std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
+        CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
+    }
+}
+
+} // dsp::
+} // fastcv::
+} // cv::
\ No newline at end of file
diff --git a/modules/fastcv/src/precomp.hpp b/modules/fastcv/src/precomp.hpp
index c2929d76cc1..c5485eeff1a 100644
--- a/modules/fastcv/src/precomp.hpp
+++ b/modules/fastcv/src/precomp.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -10,11 +10,13 @@
 #include <opencv2/imgproc.hpp>
 #include "opencv2/core/private.hpp"
 #include "opencv2/core/utils/logger.hpp"
-
+#include <opencv2/core/core_c.h>
 #include <opencv2/fastcv.hpp>
 #include <map>
+#include <atomic>
 
 #include "fastcv.h"
+#include "fastcvDsp.h"
 
 namespace cv {
 namespace fastcv {
@@ -30,6 +32,7 @@ namespace fastcv {
 
 #define FCV_KernelSize_SHIFT 3
 #define FCV_MAKETYPE(ksize,depth) ((ksize<<FCV_KernelSize_SHIFT) + depth)
+#define MIN_REMOTE_BUF_SIZE 176*144*sizeof(uint8_t)
 
 const std::map<fcvStatus, std::string> fcvStatusStrings =
 {
@@ -72,6 +75,111 @@ struct FastCvContext
     bool isInitialized;
 };
 
+namespace dsp {
+    struct FastCvDspContext;
+
+    #define IS_FASTCV_ALLOCATED(mat) \
+    ((mat.allocator == cv::fastcv::getQcAllocator()) ? true : \
+        (CV_Error(cv::Error::StsBadArg, cv::format("Matrix '%s' not allocated with FastCV allocator. " \
+                                    "Please ensure that the matrix is created using " \
+                                    "cv::fastcv::getQcAllocator().", #mat)), false))
+    
+    #define FASTCV_CHECK_DSP_INIT() \
+    if (!FastCvDspContext::getContext().isInitialized() && \
+        fcvdspinit() != 0) \
+    { \
+        CV_Error(cv::Error::StsError, "Failed to initialize DSP"); \
+    }
+                                
+    struct FastCvDspContext
+    {
+    private:
+        mutable cv::Mutex initMutex;
+        std::atomic<bool> isDspInitialized{false};
+        std::atomic<uint64_t> initializationCount{0};
+        std::atomic<uint64_t> deInitializationCount{0};
+
+        static FastCvDspContext& getInstanceImpl() {
+            static FastCvDspContext context;
+            return context;
+        }
+    public:
+        static FastCvDspContext& getContext() {
+            return getInstanceImpl();
+        }
+
+        FastCvDspContext(const FastCvDspContext&) = delete;
+        FastCvDspContext& operator=(const FastCvDspContext&) = delete;
+
+        bool initialize() {
+            cv::AutoLock lock(initMutex);
+            
+            if (isDspInitialized.load(std::memory_order_acquire)) {
+                CV_LOG_INFO(NULL, "FastCV DSP already initialized, skipping initialization");
+                return true;
+            }
+
+            CV_LOG_INFO(NULL, "Initializing FastCV DSP");
+
+            if (fcvQ6Init() == 0) {
+                isDspInitialized.store(true, std::memory_order_release);
+                initializationCount++;
+                CV_LOG_DEBUG(NULL, cv::format("FastCV DSP initialized (init count: %lu, deinit count: %lu)", 
+                initializationCount.load(), deInitializationCount.load()));
+
+                return true;
+            }
+    
+            CV_LOG_ERROR(NULL, "FastCV DSP initialization failed");
+            return false;
+        }
+
+        bool deinitialize() {
+            cv::AutoLock lock(initMutex);
+            
+            if (!isDspInitialized.load(std::memory_order_acquire)) {
+                CV_LOG_DEBUG(NULL, "FastCV DSP already deinitialized, skipping deinitialization");
+                return true;
+            }
+
+            CV_LOG_INFO(NULL, "Deinitializing FastCV DSP");
+            
+            try {
+                fcvQ6DeInit();
+                isDspInitialized.store(false, std::memory_order_release);
+                deInitializationCount++;
+                CV_LOG_DEBUG(NULL, cv::format("FastCV DSP deinitialized (init count: %lu, deinit count: %lu)", 
+                    initializationCount.load(), deInitializationCount.load()));
+         
+                return true;
+            }
+            catch (...) {
+                CV_LOG_ERROR(NULL, "Exception occurred during FastCV DSP deinitialization");
+                return false;
+            }
+        }
+
+        bool isInitialized() const {
+            return isDspInitialized.load(std::memory_order_acquire);
+        }
+
+        uint64_t getDspInitCount() const {
+            return initializationCount.load(std::memory_order_acquire);
+        }
+
+        uint64_t getDspDeInitCount() const {
+            return deInitializationCount.load(std::memory_order_acquire);
+        }
+
+        const cv::Mutex& getInitMutex() const {
+            return initMutex;
+        }
+    
+    private:
+        FastCvDspContext() = default;
+};
+
+} // namespace dsp
 } // namespace fastcv
 } // namespace cv
 
diff --git a/modules/fastcv/src/sad_dsp.cpp b/modules/fastcv/src/sad_dsp.cpp
new file mode 100644
index 00000000000..a58c1383cf6
--- /dev/null
+++ b/modules/fastcv/src/sad_dsp.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst) 
+{
+    cv::Mat patch = _patch.getMat();
+    cv::Mat src = _src.getMat();
+    
+    // Check if matrices are allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(patch));
+    CV_Assert(IS_FASTCV_ALLOCATED(src));
+    
+    CV_Assert(!_src.empty() && "src is empty");
+    CV_Assert(_src.type() == CV_8UC1 && "src type is not CV_8UC1");
+    CV_Assert(_src.step() * _src.rows() > MIN_REMOTE_BUF_SIZE && "src buffer size is too small");
+    CV_Assert(!_patch.empty() && "patch is empty");
+    CV_Assert(_patch.type() == CV_8UC1 && "patch type is not CV_8UC1");
+    CV_Assert(_patch.size() == cv::Size(8, 8) && "patch size is not 8x8");
+
+    cv::Size size = _src.size();
+    _dst.create(size, CV_16UC1);
+    cv::Mat dst = _dst.getMat();
+
+    CV_Assert(((intptr_t)src.data & 0x7) == 0 && "src data is not 8-byte aligned");
+    CV_Assert(((intptr_t)dst.data & 0x7) == 0 && "dst data is not 8-byte aligned");
+    
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+    
+    fcvSumOfAbsoluteDiffs8x8u8_v2Q((uint8_t*)patch.data, patch.step, (uint8_t*)src.data, src.cols, src.rows, src.step, (uint16_t*)dst.data, dst.step);
+}
+
+} // dsp::
+} // fastcv::
+} // cv::
diff --git a/modules/fastcv/src/thresh_dsp.cpp b/modules/fastcv/src/thresh_dsp.cpp
new file mode 100644
index 00000000000..9c74e619d37
--- /dev/null
+++ b/modules/fastcv/src/thresh_dsp.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+    void thresholdOtsu(InputArray _src, OutputArray _dst, bool type)
+    {
+        CV_Assert(
+            !_src.empty() && 
+            _src.type() == CV_8UC1 && 
+            IS_FASTCV_ALLOCATED(_src.getMat())
+        );
+
+        CV_Assert((_src.step() * _src.rows()) > MIN_REMOTE_BUF_SIZE);
+        CV_Assert(_src.cols() % 8 == 0);
+        CV_Assert(_src.step() % 8 == 0);
+
+        Mat src = _src.getMat();
+        CV_Assert(((uintptr_t)src.data & 0x7) == 0);
+
+        _dst.create(_src.size(), CV_8UC1);
+        CV_Assert(_dst.step() % 8 == 0);
+        CV_Assert(_dst.cols() % 8 == 0);
+        Mat dst = _dst.getMat();
+
+        // Check if dst is allocated by the QcAllocator
+        CV_Assert(IS_FASTCV_ALLOCATED(dst));
+        CV_Assert(((uintptr_t)dst.data & 0x7) == 0);
+        
+        if (src.data == dst.data) {
+            CV_Assert(src.step == dst.step);
+        }
+
+        // Check DSP initialization status and initialize if needed
+        FASTCV_CHECK_DSP_INIT();
+
+        fcvThreshType threshType;
+
+        if (type)
+            threshType = FCV_THRESH_BINARY_INV;
+        else
+            threshType = FCV_THRESH_BINARY;
+
+        fcvFilterThresholdOtsuu8Q(src.data, src.cols, src.rows, src.step, dst.data, dst.step, threshType);
+    }
+
+} // dsp::
+} // fastcv::
+} // cv::
\ No newline at end of file
diff --git a/modules/fastcv/test/test_blur_dsp.cpp b/modules/fastcv/test/test_blur_dsp.cpp
new file mode 100644
index 00000000000..2be6dd3475a
--- /dev/null
+++ b/modules/fastcv/test/test_blur_dsp.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+typedef testing::TestWithParam<tuple<Size, int, int>> Filter2DTest_DSP;
+
+TEST_P(Filter2DTest_DSP, accuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size srcSize = get<0>(GetParam());
+    int ddepth   = get<1>(GetParam());
+    int ksize    = get<2>(GetParam());
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(srcSize, CV_8U);
+
+    cv::Mat kernel;
+    cv::Mat dst, ref;
+    kernel.allocator = cv::fastcv::getQcAllocator();
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    switch (ddepth)
+    {
+        case CV_8U:
+        case CV_16S:
+        {
+            kernel.create(ksize,ksize,CV_8S);
+            break;
+        }
+        case CV_32F:
+        {
+            kernel.create(ksize,ksize,CV_32F);
+            break;
+        }
+        default:
+            return;
+    }
+
+    RNG& rng = cv::theRNG();
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(255));
+    cvtest::randUni(rng, kernel, Scalar::all(INT8_MIN), Scalar::all(INT8_MAX));
+
+    cv::fastcv::dsp::filter2D(src, dst, ddepth, kernel);
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    cv::filter2D(src, ref, ddepth, kernel);
+    cv::Mat difference;
+    dst.convertTo(dst, CV_8U);
+    ref.convertTo(ref, CV_8U);
+    cv::absdiff(dst, ref, difference);
+
+    int num_diff_pixels = cv::countNonZero(difference);
+    EXPECT_LT(num_diff_pixels, (src.rows+src.cols)*ksize);
+}
+
+INSTANTIATE_TEST_CASE_P(FastCV_Extension, Filter2DTest_DSP, Combine(
+/*image size*/      Values(perf::szVGA, perf::sz720p),
+/*dst depth*/      Values(CV_8U,CV_16S,CV_32F),
+/*kernel size*/    Values(3, 5, 7, 9, 11)
+));
+
+}} // namespaces opencv_test, ::
diff --git a/modules/fastcv/test/test_edges_dsp.cpp b/modules/fastcv/test/test_edges_dsp.cpp
new file mode 100644
index 00000000000..7bf41d3ba7f
--- /dev/null
+++ b/modules/fastcv/test/test_edges_dsp.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+TEST(DSP_CannyTest, accuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    cv::imread(cvtest::findDataFile("cv/detectors_descriptors_evaluation/planar/box_in_scene.png"), src, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty()) << "Could not read the image file.";
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    int lowThreshold = 0;
+    int highThreshold = 150;
+
+    cv::fastcv::dsp::Canny(src, dst, lowThreshold, highThreshold, 3, true);
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    EXPECT_FALSE(dst.empty());
+    EXPECT_EQ(src.size(), dst.size());
+}
+
+}
+}
diff --git a/modules/fastcv/test/test_fft_dsp.cpp b/modules/fastcv/test/test_fft_dsp.cpp
new file mode 100644
index 00000000000..49c20c4bfeb
--- /dev/null
+++ b/modules/fastcv/test/test_fft_dsp.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+class FFT_DSPExtTest : public ::testing::TestWithParam<cv::Size> {};
+
+TEST_P(FFT_DSPExtTest, forward)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size size = GetParam();
+
+    RNG& rng = cv::theRNG();
+
+    Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    Mat srcFloat;
+    src.convertTo(srcFloat, CV_32F);
+
+    Mat dst, ref;
+    dst.allocator = cv::fastcv::getQcAllocator();
+    cv::fastcv::dsp::FFT(src, dst);
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    cv::dft(srcFloat, ref, DFT_COMPLEX_OUTPUT);
+
+    double normInf = cvtest::norm(dst, ref, cv::NORM_INF);
+    double normL2  = cvtest::norm(dst, ref, cv::NORM_L2)  / dst.size().area();
+
+    EXPECT_LT(normInf, 19.1); // for 512x512 case
+    EXPECT_LT(normL2, 18.0 / 256.0 );
+}
+
+TEST_P(FFT_DSPExtTest, inverse)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size size = GetParam();
+
+    RNG& rng = cv::theRNG();
+
+    Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    Mat srcFloat;
+    src.convertTo(srcFloat, CV_32F);
+
+    Mat fwd, back;
+    fwd.allocator = cv::fastcv::getQcAllocator();
+    back.allocator = cv::fastcv::getQcAllocator();
+
+    cv::fastcv::dsp::FFT(src, fwd);
+    cv::fastcv::dsp::IFFT(fwd, back);
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    Mat backFloat;
+    back.convertTo(backFloat, CV_32F);
+
+    Mat fwdRef, backRef;
+    cv::dft(srcFloat, fwdRef, DFT_COMPLEX_OUTPUT);
+    cv::idft(fwdRef, backRef, DFT_REAL_OUTPUT);
+
+    backRef *= 1./(src.size().area());
+
+    double normInf = cvtest::norm(backFloat, backRef, cv::NORM_INF);
+    double normL2  = cvtest::norm(backFloat, backRef, cv::NORM_L2)  / src.size().area();
+
+    EXPECT_LT(normInf, 9.16e-05);
+    EXPECT_LT(normL2,  1.228e-06);
+}
+
+INSTANTIATE_TEST_CASE_P(FastCV_Extension, FFT_DSPExtTest, ::testing::Values(Size(256, 256), Size(512, 512)));
+
+}} // namespaces opencv_test, ::
diff --git a/modules/fastcv/test/test_main.cpp b/modules/fastcv/test/test_main.cpp
index cc60576e96f..fe8a3c6c515 100644
--- a/modules/fastcv/test/test_main.cpp
+++ b/modules/fastcv/test/test_main.cpp
@@ -5,4 +5,9 @@
 
 #include "test_precomp.hpp"
 
-CV_TEST_MAIN("")
+static void initFastCVTests()
+{
+    cvtest::registerGlobalSkipTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+}
+
+CV_TEST_MAIN("", initFastCVTests())
diff --git a/modules/fastcv/test/test_precomp.hpp b/modules/fastcv/test/test_precomp.hpp
index 7ff8ed78049..5c172e71c54 100644
--- a/modules/fastcv/test/test_precomp.hpp
+++ b/modules/fastcv/test/test_precomp.hpp
@@ -9,3 +9,5 @@
 #include <opencv2/video.hpp>
 
 #include <opencv2/fastcv.hpp>
+
+#define CV_TEST_TAG_FASTCV_SKIP_DSP "fastcv_skip_dsp"
diff --git a/modules/fastcv/test/test_sad_dsp.cpp b/modules/fastcv/test/test_sad_dsp.cpp
new file mode 100644
index 00000000000..5c160e75028
--- /dev/null
+++ b/modules/fastcv/test/test_sad_dsp.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+using namespace cv::fastcv::dsp;
+
+namespace opencv_test { namespace {
+
+TEST(SadTest, accuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    // Create an 8x8 template patch
+    cv::Mat patch;
+    patch.allocator = cv::fastcv::getQcAllocator();
+    patch.create(8, 8, CV_8UC1);
+    patch.setTo(cv::Scalar(0));
+
+    // Create a source image
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(512, 512, CV_8UC1);
+    src.setTo(cv::Scalar(255));
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    cv::fastcv::dsp::sumOfAbsoluteDiffs(patch, src, dst);
+
+    EXPECT_FALSE(dst.empty());
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+}
+
+}
+}
diff --git a/modules/fastcv/test/test_thresh_dsp.cpp b/modules/fastcv/test/test_thresh_dsp.cpp
new file mode 100644
index 00000000000..a475d928761
--- /dev/null
+++ b/modules/fastcv/test/test_thresh_dsp.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+TEST(ThresholdOtsuTest, accuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    cv::imread(cvtest::findDataFile("cv/detectors_descriptors_evaluation/planar/box_in_scene.png"), src, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty()) << "Could not read the image file.";
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    bool type = 0;
+
+    cv::fastcv::dsp::thresholdOtsu(src, dst, type);
+
+    // De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    EXPECT_FALSE(dst.empty());
+    EXPECT_EQ(src.size(), dst.size());
+
+    // Compare the result against the reference cv::threshold function with Otsu's method
+    cv::Mat referenceDst;
+    cv::threshold(src, referenceDst, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);
+
+    double maxDifference = 10.0;
+    cv::Mat diff;
+    cv::absdiff(dst, referenceDst, diff);
+    double maxVal;
+    cv::minMaxLoc(diff, nullptr, &maxVal);
+
+    EXPECT_LE(maxVal, maxDifference) << "The custom threshold result differs from the reference result by more than the acceptable threshold.";
+}
+
+TEST(ThresholdOtsuTest, inPlaceAccuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    // Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    cv::imread(cvtest::findDataFile("cv/detectors_descriptors_evaluation/planar/box_in_scene.png"), src, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty()) << "Could not read the image file.";
+
+    // Use the same buffer for in-place operation
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+    src.copyTo(dst);
+
+    bool type = false;
+
+    // Call the thresholdOtsu function for in-place operation
+    cv::fastcv::dsp::thresholdOtsu(dst, dst, type);
+
+    // De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    // Check if the output is not empty
+    EXPECT_FALSE(dst.empty());
+    EXPECT_EQ(src.size(), dst.size());
+
+    // Compare the result against the reference cv::threshold function with Otsu's method
+    cv::Mat referenceDst;
+    cv::threshold(src, referenceDst, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);
+
+    double maxDifference = 10.0;
+    cv::Mat diff;
+    cv::absdiff(dst, referenceDst, diff);
+    double maxVal;
+    cv::minMaxLoc(diff, nullptr, &maxVal);
+
+    EXPECT_LE(maxVal, maxDifference) << "The in-place threshold result differs from the reference result by more than the acceptable threshold.";
+}
+
+}} // namespaces opencv_test, ::

From 1e4d4e0f3e4c7d6d7ab9d738026fe13d3cd85cf4 Mon Sep 17 00:00:00 2001
From: Aakash Preetam <quic_apreetam@quicinc.com>
Date: Mon, 9 Jun 2025 18:17:31 +0530
Subject: [PATCH 4/4] Merge pull request #3936 from CodeLinaro:apreetam_6thPost

Add warpAffine and resizeDown APIs in FastCV Extension #3936

- Added warpAffine function to apply affine transformations.
2x3 affine transformations for both CV_8UC1 and CV_8UC3 input
2x2 matrix-based patch extraction for grayscale images, with ROI.

- Deprecated resizeDownBy2 and resizeDownBy4 functions.
- Introduced resizeDown function to down-scale images using specified scaling factors or dimensions, supporting both single-channel (CV_8UC1) and two-channel (CV_8UC2) images.

### Pull Request Readiness Checklist

See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request

- [x] I agree to contribute to the project under Apache 2 License.
- [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV
- [ ] The PR is proposed to the proper branch
- [ ] There is a reference to the original bug report and related work
- [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable
      Patch to opencv_extra has the same branch name.
- [ ] The feature is well documented and sample code can be built with the project CMake
---
 .../fastcv/include/opencv2/fastcv/scale.hpp   |  28 ++--
 .../fastcv/include/opencv2/fastcv/warp.hpp    |  40 +++++
 modules/fastcv/perf/perf_scale.cpp            |  66 ++++++++
 modules/fastcv/perf/perf_warp.cpp             | 140 +++++++++++++++++
 modules/fastcv/src/scale.cpp                  |  83 ++++++-----
 modules/fastcv/src/warp.cpp                   | 141 ++++++++++++++++++
 modules/fastcv/test/test_scale.cpp            |  71 ++++-----
 modules/fastcv/test/test_warp.cpp             | 100 +++++++++++++
 8 files changed, 575 insertions(+), 94 deletions(-)
 create mode 100644 modules/fastcv/perf/perf_scale.cpp

diff --git a/modules/fastcv/include/opencv2/fastcv/scale.hpp b/modules/fastcv/include/opencv2/fastcv/scale.hpp
index 276b2304050..7e18ce81edd 100644
--- a/modules/fastcv/include/opencv2/fastcv/scale.hpp
+++ b/modules/fastcv/include/opencv2/fastcv/scale.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -15,20 +15,18 @@ namespace fastcv {
 //! @{
 
 /**
- * @brief Down-scale the image by averaging each 2x2 pixel block.
- * 		  This function is not bit-exact with cv::resize but provides faster execution time on Qualcomm's processor.
- * @param _src The first input image data, type CV_8UC1, src height must be a multiple of 2
- * @param _dst The output image data, type CV_8UC1
-*/
-CV_EXPORTS_W void resizeDownBy2(cv::InputArray _src, cv::OutputArray _dst);
-
-/**
- * @brief Down-scale the image by averaging each 4x4 pixel block.
- * 		  This function is not bit-exact with cv::resize but provides faster execution time on Qualcomm's processor.
- * @param _src The first input image data, type CV_8UC1, src height must be a multiple of 4
- * @param _dst The output image data, type CV_8UC1
-*/
-CV_EXPORTS_W void resizeDownBy4(cv::InputArray _src, cv::OutputArray _dst);
+ * @brief Down-scales the image using specified scaling factors or dimensions.
+ *        This function supports both single-channel (CV_8UC1) and two-channel (CV_8UC2) images.
+ * 
+ * @param _src The input image data, type CV_8UC1 or CV_8UC2.
+ * @param _dst The output image data, type CV_8UC1 or CV_8UC2.
+ * @param dsize The desired size of the output image. If empty, it is calculated using inv_scale_x and inv_scale_y.
+ * @param inv_scale_x The inverse scaling factor for the width. If dsize is provided, this parameter is ignored.
+ * @param inv_scale_y The inverse scaling factor for the height. If dsize is provided, this parameter is ignored.
+ * 
+ * @note If dsize is not specified, inv_scale_x and inv_scale_y must be strictly positive.
+ */
+CV_EXPORTS_W void resizeDown(cv::InputArray _src, cv::OutputArray _dst, Size dsize, double inv_scale_x, double inv_scale_y);
 
 //! @}
 
diff --git a/modules/fastcv/include/opencv2/fastcv/warp.hpp b/modules/fastcv/include/opencv2/fastcv/warp.hpp
index 2c62b0cb313..dae1a72bcc3 100644
--- a/modules/fastcv/include/opencv2/fastcv/warp.hpp
+++ b/modules/fastcv/include/opencv2/fastcv/warp.hpp
@@ -44,6 +44,46 @@ CV_EXPORTS_W void warpPerspective(InputArray _src, OutputArray _dst, InputArray
 CV_EXPORTS_W void warpPerspective2Plane(InputArray _src1, InputArray _src2, OutputArray _dst1, OutputArray _dst2,
     InputArray _M0, Size dsize);
 
+/**
+ * @brief Performs an affine transformation on an input image using a provided transformation matrix.
+ * 
+ * This function performs two types of operations based on the transformation matrix:
+ * 
+ * 1. Standard Affine Transformation (2x3 matrix):
+ *    - Transforms the entire input image using the affine matrix
+ *    - Supports both CV_8UC1 and CV_8UC3 types
+ * 
+ * 2. Patch Extraction with Transformation (2x2 matrix):
+ *    - Extracts and transforms a patch from the input image
+ *    - Only supports CV_8UC1 type
+ *    - If input is a ROI: patch is extracted from ROI center in the original image
+ *    - If input is full image: patch is extracted from image center
+ * 
+ * @param _src              Input image. Supported formats:
+ *                          - CV_8UC1: 8-bit single-channel
+ *                          - CV_8UC3: 8-bit three-channel - only for 2x3 matrix 
+ * @param _dst              Output image. Will have the same type as src and size specified by dsize
+ * @param _M                2x2/2x3 affine transformation matrix (inversed map)
+ * @param dsize             Output size:
+ *                          - For 2x3 matrix: Size of the output image
+ *                          - For 2x2 matrix: Size of the extracted patch
+ * @param interpolation     Interpolation method. Only applicable for 2x3 transformation with CV_8UC1 input.
+ *                          Options:
+ *                          - INTER_NEAREST: Nearest-neighbor interpolation
+ *                          - INTER_LINEAR: Bilinear interpolation (default)
+ *                          - INTER_AREA: Area-based interpolation
+ *                          - INTER_CUBIC: Bicubic interpolation
+ *                          Note: CV_8UC3 input always use bicubic interpolation internally
+ * @param borderValue       Constant pixel value for border pixels. Only applicable for 2x3 transformations 
+ *                          with single-channel input.
+ *
+ * @note                    The affine matrix follows the inverse mapping convention, applied to destination coordinates
+ *                          to produce corresponding source coordinates.
+ * @note                    The function uses 'FASTCV_BORDER_CONSTANT' for border handling, with the specified 'borderValue'.
+*/
+CV_EXPORTS_W void warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int interpolation = INTER_LINEAR, 
+                            int borderValue = 0);
+
 //! @}
 
 }
diff --git a/modules/fastcv/perf/perf_scale.cpp b/modules/fastcv/perf/perf_scale.cpp
new file mode 100644
index 00000000000..e9975d51a96
--- /dev/null
+++ b/modules/fastcv/perf/perf_scale.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef perf::TestBaseWithParam<std::tuple<Size, int>> ResizePerfTest;
+
+PERF_TEST_P(ResizePerfTest, run, ::testing::Combine(
+    ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // image size
+    ::testing::Values(2, 4) // resize factor
+))
+{
+    Size size = std::get<0>(GetParam());
+    int factor = std::get<1>(GetParam());
+
+    cv::Mat inputImage(size, CV_8UC1);
+    cv::randu(inputImage, cv::Scalar::all(0), cv::Scalar::all(255));
+    
+    cv::Mat resized_image;
+    Size dsize(inputImage.cols / factor, inputImage.rows / factor);
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::resizeDown(inputImage, resized_image, dsize, 0, 0);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+typedef perf::TestBaseWithParam<std::tuple<Size, double, double, int>> ResizeByMnPerfTest;
+
+PERF_TEST_P(ResizeByMnPerfTest, run, ::testing::Combine(
+    ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // image size
+    ::testing::Values(0.35, 0.65), // inv_scale_x
+    ::testing::Values(0.35, 0.65), // inv_scale_y
+    ::testing::Values(CV_8UC1, CV_8UC2) // data type
+))
+{
+    Size size = std::get<0>(GetParam());
+    double inv_scale_x = std::get<1>(GetParam());
+    double inv_scale_y = std::get<2>(GetParam());
+    int type = std::get<3>(GetParam());
+
+    cv::Mat inputImage(size, type);
+    cv::randu(inputImage, cv::Scalar::all(0), cv::Scalar::all(255));
+    
+    Size dsize;
+    cv::Mat resized_image;
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::resizeDown(inputImage, resized_image, dsize, inv_scale_x, inv_scale_y);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
\ No newline at end of file
diff --git a/modules/fastcv/perf/perf_warp.cpp b/modules/fastcv/perf/perf_warp.cpp
index a2ec2b65cee..008c1e100a7 100644
--- a/modules/fastcv/perf/perf_warp.cpp
+++ b/modules/fastcv/perf/perf_warp.cpp
@@ -39,6 +39,29 @@ static void getInvertMatrix(Mat& src, Size dstSize, Mat& M)
     invert(M,M);
 }
 
+static cv::Mat getInverseAffine(const cv::Mat& affine)
+{
+    // Extract the 2x2 part
+    cv::Mat rotationScaling = affine(cv::Rect(0, 0, 2, 2));
+
+    // Invert the 2x2 part
+    cv::Mat inverseRotationScaling;
+    cv::invert(rotationScaling, inverseRotationScaling);
+
+    // Extract the translation part
+    cv::Mat translation = affine(cv::Rect(2, 0, 1, 2));
+
+    // Compute the new translation
+    cv::Mat inverseTranslation = -inverseRotationScaling * translation;
+
+    // Construct the inverse affine matrix
+    cv::Mat inverseAffine = cv::Mat::zeros(2, 3, CV_32F);
+    inverseRotationScaling.copyTo(inverseAffine(cv::Rect(0, 0, 2, 2)));
+    inverseTranslation.copyTo(inverseAffine(cv::Rect(2, 0, 1, 2)));
+
+    return inverseAffine;
+}
+
 typedef perf::TestBaseWithParam<Size> WarpPerspective2PlanePerfTest;
 
 PERF_TEST_P(WarpPerspective2PlanePerfTest, run,
@@ -93,4 +116,121 @@ PERF_TEST_P(WarpPerspectivePerfTest, run,
     SANITY_CHECK_NOTHING();
 }
 
+typedef TestBaseWithParam< tuple<MatType, Size> > WarpAffine3ChannelPerf;
+
+PERF_TEST_P(WarpAffine3ChannelPerf, run, Combine(
+            Values(CV_8UC3),
+            Values( szVGA, sz720p, sz1080p)
+))
+{
+    Size sz, szSrc(512, 512);
+    int dataType;
+    dataType   = get<0>(GetParam());
+    sz         = get<1>(GetParam());
+
+    cv::Mat src(szSrc, dataType), dst(sz, dataType);
+
+    cvtest::fillGradient(src);
+
+    //Affine matrix
+    float angle = 30.0; // Rotation angle in degrees
+    float scale = 2.2;  // Scale factor
+    cv::Mat affine = cv::getRotationMatrix2D(cv::Point2f(100, 100), angle, scale);
+
+    // Compute the inverse affine matrix
+    cv::Mat inverseAffine = getInverseAffine(affine);
+
+    // Create the dstBorder array
+    Mat dstBorder;
+
+    declare.in(src).out(dst);
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::warpAffine(src, dst, inverseAffine, sz);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+typedef perf::TestBaseWithParam<std::tuple<cv::Size, cv::Point2f, cv::Mat>> WarpAffineROIPerfTest;
+
+PERF_TEST_P(WarpAffineROIPerfTest, run, ::testing::Combine(
+    ::testing::Values(cv::Size(50, 50), cv::Size(100, 100)), // patch size
+    ::testing::Values(cv::Point2f(50.0f, 50.0f), cv::Point2f(100.0f, 100.0f)), // position
+    ::testing::Values((cv::Mat_<float>(2, 2) << 1, 0, 0, 1), // identity matrix
+                      (cv::Mat_<float>(2, 2) << cos(CV_PI), -sin(CV_PI), sin(CV_PI), cos(CV_PI))) // rotation matrix
+))
+{
+    cv::Size patchSize = std::get<0>(GetParam());
+    cv::Point2f position = std::get<1>(GetParam());
+    cv::Mat affine = std::get<2>(GetParam());
+
+    cv::Mat src = cv::imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    
+    // Create ROI with top-left at the specified position
+    cv::Rect roiRect(static_cast<int>(position.x), static_cast<int>(position.y), patchSize.width, patchSize.height);
+
+    // Ensure ROI is within image bounds
+    roiRect = roiRect & cv::Rect(0, 0, src.cols, src.rows);
+    cv::Mat roi = src(roiRect);
+
+    cv::Mat patch;
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::warpAffine(roi, patch, affine, patchSize);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+typedef TestBaseWithParam<tuple<int, int> > WarpAffinePerfTest;
+
+PERF_TEST_P(WarpAffinePerfTest, run, ::testing::Combine(
+    ::testing::Values(cv::InterpolationFlags::INTER_NEAREST, cv::InterpolationFlags::INTER_LINEAR, cv::InterpolationFlags::INTER_AREA),
+    ::testing::Values(0, 255) // Black and white borders
+))
+{
+    // Load the source image
+    cv::Mat src = cv::imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty());
+
+    // Generate random values for the affine matrix
+    std::srand(std::time(0));
+    float angle = static_cast<float>(std::rand() % 360); // Random angle between 0 and 360 degrees
+    float scale = static_cast<float>(std::rand() % 200) / 100.0f + 0.5f; // Random scale between 0.5 and 2.5
+    float tx = static_cast<float>(std::rand() % 100) - 50; // Random translation between -50 and 50
+    float ty = static_cast<float>(std::rand() % 100) - 50; // Random translation between -50 and 50
+    float radians = angle * CV_PI / 180.0;
+    cv::Mat affine = (cv::Mat_<float>(2, 3) << scale * cos(radians), -scale * sin(radians), tx,
+                                               scale * sin(radians),  scale * cos(radians), ty);
+
+    // Compute the inverse affine matrix
+    cv::Mat inverseAffine = getInverseAffine(affine);
+
+    // Define the destination size
+    cv::Size dsize(src.cols, src.rows);
+
+    // Define the output matrix
+    cv::Mat dst;
+
+    // Get the parameters
+    int interpolation = std::get<0>(GetParam());
+    int borderValue = std::get<1>(GetParam());
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::warpAffine(src, dst, inverseAffine, dsize, interpolation, borderValue);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
 } //namespace
\ No newline at end of file
diff --git a/modules/fastcv/src/scale.cpp b/modules/fastcv/src/scale.cpp
index 3e1a3a74b8a..0e37e96213f 100644
--- a/modules/fastcv/src/scale.cpp
+++ b/modules/fastcv/src/scale.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -8,55 +8,68 @@
 namespace cv {
 namespace fastcv {
 
-void resizeDownBy2(cv::InputArray _src, cv::OutputArray _dst)
+void resizeDown(cv::InputArray _src, cv::OutputArray _dst, Size dsize, double inv_scale_x, double inv_scale_y)
 {
-    INITIALIZATION_CHECK;
-
-    CV_Assert(!_src.empty() && _src.type() == CV_8UC1);
-
-    Mat src = _src.getMat();
-    CV_Assert((src.cols & 1)==0 && (src.rows & 1)==0);
-
-    int type = _src.type();
-    cv::Size dsize(src.cols / 2, src.rows / 2);
-
-    _dst.create(dsize, type);
-
-    Mat dst = _dst.getMat();
+    fcvStatus status = FASTCV_SUCCESS;
+    Size ssize = _src.size();
 
-    fcvStatus status = (fcvStatus)fcvScaleDownBy2u8_v2((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data,
-        src.cols/2);
+    CV_Assert(!_src.empty() );
+    CV_Assert( _src.type() == CV_8UC1 || _src.type() == CV_8UC2 );
 
-    if (status != FASTCV_SUCCESS)
+    if( dsize.empty() )
     {
-        std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
-        CV_Error( cv::Error::StsInternal, "FastCV error: " + s);
+        CV_Assert(inv_scale_x > 0);
+        CV_Assert(inv_scale_y > 0);
+        dsize = Size(saturate_cast<int>(ssize.width*inv_scale_x),
+                     saturate_cast<int>(ssize.height*inv_scale_y));
+        CV_Assert( !dsize.empty() );
+    }
+    else
+    {
+        inv_scale_x = static_cast<double>(dsize.width) / ssize.width;
+        inv_scale_y = static_cast<double>(dsize.height) / ssize.height;
+        CV_Assert(inv_scale_x > 0);
+        CV_Assert(inv_scale_y > 0);
     }
-}
-
-void resizeDownBy4(cv::InputArray _src, cv::OutputArray _dst)
-{
-    INITIALIZATION_CHECK;
-
-    CV_Assert(!_src.empty() && _src.type() == CV_8UC1);
 
-    Mat src = _src.getMat();
-    CV_Assert((src.cols & 3)==0 && (src.rows & 3)==0);
+    CV_Assert(dsize.width <= ssize.width && dsize.height <= ssize.height);
 
-    int type = _src.type();
-    cv::Size dsize(src.cols / 4, src.rows / 4);
+    CV_Assert(dsize.width * 20 > ssize.width);
+    CV_Assert(dsize.height * 20 > ssize.height);
 
-    _dst.create(dsize, type);
+    INITIALIZATION_CHECK;
 
+    Mat src = _src.getMat();
+    _dst.create(dsize, src.type());
     Mat dst = _dst.getMat();
 
-    fcvStatus status = (fcvStatus)fcvScaleDownBy4u8_v2((const uint8_t*)src.data, src.cols, src.rows, src.step,
-        (uint8_t*)dst.data, src.cols/4);
+    // Alignment checks
+    CV_Assert(reinterpret_cast<uintptr_t>(src.data) % 16 == 0);
+    CV_Assert(reinterpret_cast<uintptr_t>(dst.data) % 16 == 0);
+
+    if(src.type() == CV_8UC2)
+    {
+        fcvScaleDownMNInterleaveu8((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data, dst.cols, dst.rows, dst.step);
+    }
+    else if (src.cols/dst.cols == 4 && src.rows/dst.rows == 4 && src.cols % dst.cols == 0 && src.rows % dst.rows == 0)
+    {
+        CV_Assert(src.rows % 4 == 0);
+        status = (fcvStatus)fcvScaleDownBy4u8_v2((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data, dst.step);
+    }
+    else if (src.cols/dst.cols == 2 && src.rows/dst.rows == 2 && src.cols % dst.cols == 0 && src.rows % dst.rows == 0)
+    {
+        CV_Assert(src.rows % 2 == 0);
+        status = (fcvStatus)fcvScaleDownBy2u8_v2((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data, dst.step);
+    }
+    else
+    {
+        fcvScaleDownMNu8((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data, dst.cols, dst.rows, dst.step);
+    }
 
     if (status != FASTCV_SUCCESS)
     {
         std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
-        CV_Error( cv::Error::StsInternal, "FastCV error: " + s);
+        CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
     }
 }
 
diff --git a/modules/fastcv/src/warp.cpp b/modules/fastcv/src/warp.cpp
index ac806ffc4ae..28e312e26f9 100644
--- a/modules/fastcv/src/warp.cpp
+++ b/modules/fastcv/src/warp.cpp
@@ -175,5 +175,146 @@ void warpPerspective(InputArray _src, OutputArray _dst, InputArray _M0, Size dsi
         FcvWarpPerspectiveLoop_Invoker(src, tmp, dst, tmp, matrix, fcvInterpolation, fcvBorder, fcvBorderValue), nStripes);
 }
 
+void warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize,
+                int interpolation, int borderValue)
+{
+    INITIALIZATION_CHECK;
+    CV_Assert(!_src.empty());
+    CV_Assert(!_M.empty());
+
+    Mat src = _src.getMat();
+    Mat M = _M.getMat();
+
+    CV_CheckEQ(M.rows, 2, "Affine Matrix must have 2 rows");
+    CV_Check(M.cols, M.cols == 2 || M.cols == 3, "Affine Matrix must be 2x2 or 2x3");
+
+    if (M.rows == 2 && M.cols == 2)
+    {
+        CV_CheckTypeEQ(src.type(), CV_8UC1, "2x2 matrix transformation only supports CV_8UC1");
+
+        // Check if src is a ROI
+        Size wholeSize;
+        Point ofs;
+        src.locateROI(wholeSize, ofs);
+        bool isROI = (wholeSize.width > src.cols || wholeSize.height > src.rows);
+
+        Mat fullImage;
+        Point2f center;
+
+        if (isROI)
+        {
+            center.x = ofs.x + src.cols / 2.0f;
+            center.y = ofs.y + src.rows / 2.0f;
+
+            CV_Check(center.x, center.x >= 0 && center.x < wholeSize.width, "ROI center X is outside full image bounds");
+            CV_Check(center.y, center.y >= 0 && center.y < wholeSize.height, "ROI center Y is outside full image bounds");
+
+            size_t offset = ofs.y * src.step + ofs.x * src.elemSize();
+            fullImage = Mat(wholeSize, src.type(), src.data - offset);
+        }
+        else
+        {
+            // Use src as is, center at image center
+            fullImage = src;
+            center.x = src.cols / 2.0f;
+            center.y = src.rows / 2.0f;
+
+            CV_LOG_WARNING(NULL, "2x2 matrix with non-ROI input. Using image center for patch extraction.");
+        }
+
+        float affineMatrix[4] = {
+            M.at<float>(0, 0), M.at<float>(0, 1),
+            M.at<float>(1, 0), M.at<float>(1, 1)};
+
+        float position[2] = {center.x, center.y};
+
+        _dst.create(dsize, src.type());
+        Mat dst = _dst.getMat();
+        dst.step = dst.cols * src.elemSize();
+
+        int status = fcvTransformAffineu8_v2(
+            (const uint8_t *)fullImage.data,
+            fullImage.cols, fullImage.rows, fullImage.step,
+            position,
+            affineMatrix,
+            (uint8_t *)dst.data,
+            dst.cols, dst.rows, dst.step);
+
+        if (status != 0)
+        {
+            CV_Error(Error::StsInternal, "FastCV patch extraction failed");
+        }
+
+        return;
+    }
+
+    // Validate 2x3 matrix for standard transformation
+    CV_CheckEQ(M.cols, 3, "Matrix must be 2x3 for standard affine transformation");
+    CV_Check(src.type(), src.type() == CV_8UC1 || src.type() == CV_8UC3, "Standard transformation supports CV_8UC1 or CV_8UC3");
+
+    float32_t affineMatrix[6] = {
+        M.at<float>(0, 0), M.at<float>(0, 1), M.at<float>(0, 2),
+        M.at<float>(1, 0), M.at<float>(1, 1), M.at<float>(1, 2)};
+
+    _dst.create(dsize, src.type());
+    Mat dst = _dst.getMat();
+
+    if (src.channels() == 1)
+    {
+        fcvStatus status;
+        fcvInterpolationType fcvInterpolation;
+
+        switch (interpolation)
+        {
+        case cv::InterpolationFlags::INTER_NEAREST:
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_NEAREST_NEIGHBOR;
+            break;
+        case cv::InterpolationFlags::INTER_LINEAR:
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_BILINEAR;
+            break;
+        case cv::InterpolationFlags::INTER_AREA:
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_AREA;
+            break;
+        default:
+            CV_Error(cv::Error::StsBadArg, "Unsupported interpolation type");
+        }
+
+        status = fcvTransformAffineClippedu8_v3(
+            (const uint8_t *)src.data, src.cols, src.rows, src.step,
+            affineMatrix,
+            (uint8_t *)dst.data, dst.cols, dst.rows, dst.step,
+            NULL,
+            fcvInterpolation,
+            FASTCV_BORDER_CONSTANT,
+            borderValue);
+
+        if (status != FASTCV_SUCCESS)
+        {
+            std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
+            CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
+        }
+    }
+    else if (src.channels() == 3)
+    {
+        CV_LOG_INFO(NULL, "warpAffine: 3-channel images use bicubic interpolation internally.");
+
+        std::vector<uint32_t> dstBorder;
+        try
+        {
+            dstBorder.resize(dsize.height * 2);
+        }
+        catch (const std::bad_alloc &)
+        {
+            CV_Error(Error::StsNoMem, "Failed to allocate border array");
+        }
+
+        fcv3ChannelTransformAffineClippedBCu8(
+            (const uint8_t *)src.data, src.cols, src.rows, src.step[0],
+            affineMatrix,
+            (uint8_t *)dst.data, dst.cols, dst.rows, dst.step[0],
+            dstBorder.data());
+    }
+}
+
 } // fastcv::
 } // cv::
\ No newline at end of file
diff --git a/modules/fastcv/test/test_scale.cpp b/modules/fastcv/test/test_scale.cpp
index b8e84218ed8..46ffa7d32f8 100644
--- a/modules/fastcv/test/test_scale.cpp
+++ b/modules/fastcv/test/test_scale.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -7,27 +7,21 @@
 
 namespace opencv_test { namespace {
 
-class ResizeBy2Test : public ::testing::TestWithParam<cv::Size> {};
-class ResizeBy4Test : public ::testing::TestWithParam<cv::Size> {};
-
 TEST(resizeDownBy2, accuracy)
 {
     cv::Mat inputImage = cv::imread(cvtest::findDataFile("cv/shared/box_in_scene.png"), cv::IMREAD_GRAYSCALE);
 
-    Size dsize;
     cv::Mat resized_image;
 
-    cv::fastcv::resizeDownBy2(inputImage, resized_image);
+    cv::fastcv::resizeDown(inputImage, resized_image, cv::Size(inputImage.cols / 2, inputImage.rows / 2), 0, 0);
 
     EXPECT_FALSE(resized_image.empty());
 
     cv::Mat resizedImageOpenCV;
     cv::resize(inputImage, resizedImageOpenCV, cv::Size(inputImage.cols / 2, inputImage.rows / 2), 0, 0, INTER_AREA);
 
-    // Calculate the maximum difference
     double maxVal = cv::norm(resized_image, resizedImageOpenCV, cv::NORM_INF);
 
-    // Assert if the difference is acceptable (max difference should be less than 10)
     CV_Assert(maxVal < 10 && "Difference between images is too high!");
 }
 
@@ -38,67 +32,56 @@ TEST(resizeDownBy4, accuracy)
     Size dsize;
     cv::Mat resized_image;
 
-    cv::fastcv::resizeDownBy4(inputImage, resized_image);
+    cv::fastcv::resizeDown(inputImage, resized_image, dsize, 0.25, 0.25);
 
     EXPECT_FALSE(resized_image.empty());
 
     cv::Mat resizedImageOpenCV;
     cv::resize(inputImage, resizedImageOpenCV, cv::Size(inputImage.cols / 4, inputImage.rows / 4), 0, 0, INTER_AREA);
 
-    // Calculate the maximum difference
     double maxVal = cv::norm(resized_image, resizedImageOpenCV, cv::NORM_INF);
 
-    // Assert if the difference is acceptable (max difference should be less than 10)
     CV_Assert(maxVal < 10 && "Difference between images is too high!");
 }
 
-TEST_P(ResizeBy2Test, ResizeBy2) {
-
-    //Size size = get<0>(GetParam());
-    Size size = GetParam();
-    cv::Mat inputImage(size, CV_8UC1);
-    randu(inputImage, Scalar::all(0), Scalar::all(255)); // Fill with random values
+TEST(resizeDownMN, accuracy)
+{
+    cv::Mat inputImage = cv::imread(cvtest::findDataFile("cv/cascadeandhog/images/class57.png"), cv::IMREAD_GRAYSCALE);
 
-    Size dsize;
     cv::Mat resized_image;
 
-    // Resize the image by a factor of 2
-    cv::fastcv::resizeDownBy2(inputImage, resized_image);
+    cv::fastcv::resizeDown(inputImage, resized_image, cv::Size(800, 640), 0, 0);
 
-    // Check if the output size is correct
-    EXPECT_EQ(resized_image.size().width, size.width * 0.5);
-    EXPECT_EQ(resized_image.size().height, size.height * 0.5);
+    EXPECT_FALSE(resized_image.empty());
+
+    cv::Mat resizedImageOpenCV;
+    cv::resize(inputImage, resizedImageOpenCV, cv::Size(800, 640), 0, 0, INTER_LINEAR);
+
+    double maxVal = cv::norm(resized_image, resizedImageOpenCV, cv::NORM_INF);
+
+    CV_Assert(maxVal < 78 && "Difference between images is too high!");
 }
 
-TEST_P(ResizeBy4Test, ResizeBy4) {
+TEST(resizeDownInterleaved, accuracy)
+{
+    cv::Mat inputImage = cv::Mat::zeros(512, 512, CV_8UC2);
+    cv::randu(inputImage, cv::Scalar(0), cv::Scalar(255));
 
-    //Size size = get<0>(GetParam());
-    Size size = GetParam();
-    cv::Mat inputImage(size, CV_8UC1);
-    randu(inputImage, Scalar::all(0), Scalar::all(255)); // Fill with random values
 
     Size dsize;
     cv::Mat resized_image;
 
-    // Resize the image by a factor of 4
-    cv::fastcv::resizeDownBy4(inputImage, resized_image);
+    cv::fastcv::resizeDown(inputImage, resized_image, dsize, 0.500, 0.125);
 
-    // Check if the output size is correct
-    EXPECT_EQ(resized_image.size().width, size.width * 0.25);
-    EXPECT_EQ(resized_image.size().height, size.height * 0.25);
-}
+    EXPECT_FALSE(resized_image.empty());
 
-INSTANTIATE_TEST_CASE_P(
-    ResizeTests,
-    ResizeBy2Test,
-    ::testing::Values(cv::Size(640, 480), cv::Size(1280, 720), cv::Size(1920, 1080)
-));
 
-INSTANTIATE_TEST_CASE_P(
-    ResizeTests,
-    ResizeBy4Test,
-    ::testing::Values(cv::Size(640, 480), cv::Size(1280, 720), cv::Size(1920, 1080)
-));
+    cv::Mat resizedImageOpenCV;
+    cv::resize(inputImage, resizedImageOpenCV, dsize, 0.500, 0.125, INTER_AREA);
 
+    double maxVal = cv::norm(resized_image, resizedImageOpenCV, cv::NORM_INF);
+
+    CV_Assert(maxVal < 10 && "Difference between images is too high!");
+}
 
 }} // namespaces opencv_test, ::
\ No newline at end of file
diff --git a/modules/fastcv/test/test_warp.cpp b/modules/fastcv/test/test_warp.cpp
index a87902ad102..72f32bda031 100644
--- a/modules/fastcv/test/test_warp.cpp
+++ b/modules/fastcv/test/test_warp.cpp
@@ -39,6 +39,29 @@ static void getInvertMatrix(Mat& src, Size dstSize, Mat& M)
     invert(M,M);
 }
 
+static cv::Mat getInverseAffine(const cv::Mat& affine)
+{
+    // Extract the 2x2 part
+    cv::Mat rotationScaling = affine(cv::Rect(0, 0, 2, 2));
+
+    // Invert the 2x2 part
+    cv::Mat inverseRotationScaling;
+    cv::invert(rotationScaling, inverseRotationScaling);
+
+    // Extract the translation part
+    cv::Mat translation = affine(cv::Rect(2, 0, 1, 2));
+
+    // Compute the new translation
+    cv::Mat inverseTranslation = -inverseRotationScaling * translation;
+
+    // Construct the inverse affine matrix
+    cv::Mat inverseAffine = cv::Mat::zeros(2, 3, CV_32F);
+    inverseRotationScaling.copyTo(inverseAffine(cv::Rect(0, 0, 2, 2)));
+    inverseTranslation.copyTo(inverseAffine(cv::Rect(2, 0, 1, 2)));
+
+    return inverseAffine;
+}
+
 typedef testing::TestWithParam<cv::Size> WarpPerspective2Plane;
 
 TEST_P(WarpPerspective2Plane, accuracy)
@@ -106,5 +129,82 @@ INSTANTIATE_TEST_CASE_P(FastCV_Extension, WarpPerspective,Combine(
 ));
 INSTANTIATE_TEST_CASE_P(FastCV_Extension, WarpPerspective2Plane, Values(perf::szVGA, perf::sz720p, perf::sz1080p));
 
+TEST(WarpAffine3ChannelTest, accuracy)
+{
+    cv::Mat src = imread(cvtest::findDataFile("cv/shared/baboon.png"));
+
+    // Define the transformation matrix
+    cv::Mat M = (cv::Mat_<float>(2, 3) << 2.0, 0, -50.0, 0, 2.0, -50.0);
+
+    cv::Size dsize(src.cols, src.rows);
+
+    cv::Mat dst;
+
+    cv::fastcv::warpAffine(src, dst, M, dsize);
+
+    EXPECT_FALSE(dst.empty());
+}
+
+TEST(WarpAffineROITest, accuracy)
+{
+    cv::Mat src = cv::imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+
+    // Define the position and affine matrix
+    cv::Point2f position(src.cols / 2.0f, src.rows / 2.0f);
+
+    float angle = 180.0; // Rotation angle in degrees
+    float radians = angle * CV_PI / 180.0;
+    cv::Mat affine = (cv::Mat_<float>(2, 2) << cos(radians), -sin(radians), sin(radians), cos(radians));
+
+    cv::Mat patch;
+    cv::Mat roi = src(cv::Rect(0, 0, 100, 100));
+    cv::fastcv::warpAffine(roi, patch, affine, cv::Size(100, 100));
+
+    EXPECT_FALSE(patch.empty());
+    EXPECT_EQ(patch.size(), cv::Size(100, 100));
+    EXPECT_EQ(patch.type(), CV_8UC1);
+}
+
+typedef testing::TestWithParam<tuple<int, int>> WarpAffineTest;
+
+TEST_P(WarpAffineTest, accuracy)
+{
+    // Load the source image
+    cv::Mat src = cv::imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty());
+
+    float angle = 30.0;// Rotation angle in degrees
+    float scale = 0.5;// Scale factor
+    cv::Mat affine = cv::getRotationMatrix2D(cv::Point2f(100, 100), angle, scale);
+
+    // Compute the inverse affine matrix
+    cv::Mat inverseAffine = getInverseAffine(affine);
+
+    // Define the destination size
+    cv::Size dsize(src.cols, src.rows);
+
+    // Define the output matrix
+    cv::Mat dst;
+
+    // Get the parameters
+    int interpolation = std::get<0>(GetParam());
+    int borderValue = std::get<1>(GetParam());
+
+    // Perform the affine transformation
+    cv::fastcv::warpAffine(src, dst, inverseAffine, dsize, interpolation, borderValue);
+
+    // Check that the output is not empty
+    EXPECT_FALSE(dst.empty());
+}
+
+INSTANTIATE_TEST_CASE_P(
+    FastCV_Extension,
+    WarpAffineTest,
+    ::testing::Combine(
+        ::testing::Values(INTER_NEAREST, INTER_LINEAR, INTER_AREA),
+        ::testing::Values(0, 255) // Black and white borders
+    )
+);
+
 }
 }
\ No newline at end of file