diff --git a/modules/fastcv/include/opencv2/fastcv.hpp b/modules/fastcv/include/opencv2/fastcv.hpp
index 292e83a2dc3..6626c4c9b5a 100644
--- a/modules/fastcv/include/opencv2/fastcv.hpp
+++ b/modules/fastcv/include/opencv2/fastcv.hpp
@@ -30,6 +30,13 @@
 #include "opencv2/fastcv/thresh.hpp"
 #include "opencv2/fastcv/tracking.hpp"
 #include "opencv2/fastcv/warp.hpp"
+#include "opencv2/fastcv/allocator.hpp"
+#include "opencv2/fastcv/dsp_init.hpp"
+#include "opencv2/fastcv/sad_dsp.hpp"
+#include "opencv2/fastcv/thresh_dsp.hpp"
+#include "opencv2/fastcv/fft_dsp.hpp"
+#include "opencv2/fastcv/edges_dsp.hpp"
+#include "opencv2/fastcv/blur_dsp.hpp"
 
 /**
  * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions
diff --git a/modules/fastcv/include/opencv2/fastcv/allocator.hpp b/modules/fastcv/include/opencv2/fastcv/allocator.hpp
new file mode 100644
index 00000000000..a70666723ca
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/allocator.hpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_ALLOCATOR_HPP
+#define OPENCV_FASTCV_ALLOCATOR_HPP
+
+#include <opencv2/core.hpp>
+#include <set>
+#include <mutex>
+
+namespace cv {
+namespace fastcv {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Resource manager for FastCV allocations.
+ * This class manages active allocations.
+ */
+class QcResourceManager {
+public:
+    static QcResourceManager& getInstance();
+
+    void addAllocation(void* ptr);
+    void removeAllocation(void* ptr);
+
+private:
+    QcResourceManager() = default;
+    std::set<void*> activeAllocations;
+    std::mutex resourceMutex;
+};
+
+/**
+ * @brief Qualcomm's custom allocator.
+ * This allocator uses Qualcomm's memory management functions.
+ */
+class QcAllocator : public cv::MatAllocator {
+    public:
+        QcAllocator();
+        ~QcAllocator();
+    
+        cv::UMatData* allocate(int dims, const int* sizes, int type, void* data0, size_t* step, cv::AccessFlag flags, cv::UMatUsageFlags usageFlags) const CV_OVERRIDE;
+        bool allocate(cv::UMatData* u, cv::AccessFlag accessFlags, cv::UMatUsageFlags usageFlags) const CV_OVERRIDE;
+        void deallocate(cv::UMatData* u) const CV_OVERRIDE;
+};
+
+/**
+ * @brief Gets the default Qualcomm's allocator.
+ * This function returns a pointer to the default Qualcomm's allocator, which is optimized
+ * for use with DSP.
+ *
+ * @return Pointer to the default FastCV allocator.
+ */
+CV_EXPORTS cv::MatAllocator* getQcAllocator();
+
+//! @}
+
+} // namespace fastcv
+} // namespace cv
+
+#endif // OPENCV_FASTCV_ALLOCATOR_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp
new file mode 100644
index 00000000000..1228bdde458
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_BLUR_DSP_HPP
+#define OPENCV_FASTCV_BLUR_DSP_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Filter an image with non-separable kernel
+ * @param _src Intput image with type CV_8UC1, src size should be greater than 176*144
+ * @param _dst Output image with type CV_8UC1, CV_16SC1 or CV_32FC1
+ * @param ddepth The depth of output image
+ * @param _kernel Filer kernel data
+ */
+CV_EXPORTS void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel);
+
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_BLUR_DSP_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp b/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp
new file mode 100644
index 00000000000..942b7fdaa95
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_DSP_INIT_HPP
+#define OPENCV_FASTCV_DSP_INIT_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Initializes the FastCV DSP environment.
+ * 
+ * This function sets up the necessary environment and resources for the DSP to operate.
+ * It must be called once at the very beginning of the use case or program to ensure that 
+ * the DSP is properly initialized before any DSP-related operations are performed.
+ *
+ * @note This function must be called at the start of the use case or program, before any 
+ *       DSP-related operations.
+ * 
+ * @return int Returns 0 on success, and a non-zero value on failure.
+ */
+CV_EXPORTS int fcvdspinit();
+
+/**
+ * @brief Deinitializes the FastCV DSP environment.
+ * 
+ * This function releases the resources and environment set up by the 'fcvdspinit' function.
+ * It should be called before the use case or program exits to ensure that all DSP resources 
+ * are properly cleaned up and no memory leaks occur.
+ *
+ * @note This function must be called at the end of the use case or program, after all DSP-related 
+ *       operations are complete.
+ */
+CV_EXPORTS void fcvdspdeinit();
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_DSP_INIT_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp
new file mode 100644
index 00000000000..37b2aef5515
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_EDGES_DSP_HPP
+#define OPENCV_FASTCV_EDGES_DSP_HPP
+
+#include "opencv2/core/mat.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+/**
+* @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions
+*/
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Canny edge detector applied to a 8 bit grayscale image
+ * @param _src          Input image with type CV_8UC1
+ * @param _dst          Output 8-bit image containing the edge detection results
+ * @param lowThreshold  First threshold
+ * @param highThreshold Second threshold
+ * @param apertureSize  The Sobel kernel size for calculating gradient. Supported sizes are 3, 5 and 7.
+ * @param L2gradient    L2 Gradient or L1 Gradient
+*/
+CV_EXPORTS void Canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize = 3, bool L2gradient = false);
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif //OPENCV_FASTCV_EDGES_DSP_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp
new file mode 100644
index 00000000000..b4e4e44ecdc
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_FFT_DSP_HPP
+#define OPENCV_FASTCV_FFT_DSP_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+* @brief Computes the 1D or 2D Fast Fourier Transform of a real valued matrix.
+        For the 2D case, the width and height of the input and output matrix must be powers of 2.
+        For the 1D case, the height of the matrices must be 1, while the width must be a power of 2.
+
+* @param src Input array of CV_8UC1. The dimensions of the matrix must be powers of 2 for the 2D case,
+            and in the 1D case, the height must be 1, while the width must be a power of 2.
+* @param dst The computed FFT matrix of type CV_32FC2. The FFT Re and Im coefficients are stored in different channels.
+            Hence the dimensions of the dst are (srcWidth, srcHeight)
+*/
+CV_EXPORTS void FFT(InputArray src, OutputArray dst);
+
+/**
+* @brief Computes the 1D or 2D Inverse Fast Fourier Transform of a complex valued matrix.
+        For the 2D case, The width and height of the input and output matrix must be powers of 2.
+        For the 1D case, the height of the matrices must be 1, while the width must be a power of 2.
+
+* @param src Input array of type CV_32FC2 containing FFT Re and Im coefficients stored in separate channels.
+            The dimensions of the matrix must be powers of 2 for the 2D case, and in the 1D case, the height must be 1,
+            while the width must be a power of 2.
+* @param dst The computed IFFT matrix of type CV_8U. The matrix is real valued and has no imaginary components.
+            Hence the dimensions of the dst are (srcWidth , srcHeight)
+*/
+CV_EXPORTS void IFFT(InputArray src, OutputArray dst);
+
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_FFT_DSP_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp
new file mode 100644
index 00000000000..b9ae9079686
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_SAD_HPP
+#define OPENCV_FASTCV_SAD_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+/**
+ * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions
+ */
+
+//! @addtogroup fastcv
+//! @{
+/**
+ * @brief Sum of absolute differences of an image against an 8x8 template.
+ * @param _patch The first input image data, type CV_8UC1
+ * @param _src The input image data, type CV_8UC1
+ * @param _dst The output image data, type CV_16UC1
+*/
+CV_EXPORTS void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst);
+//! @}
+
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_SAD_HPP
diff --git a/modules/fastcv/include/opencv2/fastcv/scale.hpp b/modules/fastcv/include/opencv2/fastcv/scale.hpp
index 276b2304050..7e18ce81edd 100644
--- a/modules/fastcv/include/opencv2/fastcv/scale.hpp
+++ b/modules/fastcv/include/opencv2/fastcv/scale.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -15,20 +15,18 @@ namespace fastcv {
 //! @{
 
 /**
- * @brief Down-scale the image by averaging each 2x2 pixel block.
- * 		  This function is not bit-exact with cv::resize but provides faster execution time on Qualcomm's processor.
- * @param _src The first input image data, type CV_8UC1, src height must be a multiple of 2
- * @param _dst The output image data, type CV_8UC1
-*/
-CV_EXPORTS_W void resizeDownBy2(cv::InputArray _src, cv::OutputArray _dst);
-
-/**
- * @brief Down-scale the image by averaging each 4x4 pixel block.
- * 		  This function is not bit-exact with cv::resize but provides faster execution time on Qualcomm's processor.
- * @param _src The first input image data, type CV_8UC1, src height must be a multiple of 4
- * @param _dst The output image data, type CV_8UC1
-*/
-CV_EXPORTS_W void resizeDownBy4(cv::InputArray _src, cv::OutputArray _dst);
+ * @brief Down-scales the image using specified scaling factors or dimensions.
+ *        This function supports both single-channel (CV_8UC1) and two-channel (CV_8UC2) images.
+ * 
+ * @param _src The input image data, type CV_8UC1 or CV_8UC2.
+ * @param _dst The output image data, type CV_8UC1 or CV_8UC2.
+ * @param dsize The desired size of the output image. If empty, it is calculated using inv_scale_x and inv_scale_y.
+ * @param inv_scale_x The inverse scaling factor for the width. If dsize is provided, this parameter is ignored.
+ * @param inv_scale_y The inverse scaling factor for the height. If dsize is provided, this parameter is ignored.
+ * 
+ * @note If dsize is not specified, inv_scale_x and inv_scale_y must be strictly positive.
+ */
+CV_EXPORTS_W void resizeDown(cv::InputArray _src, cv::OutputArray _dst, Size dsize, double inv_scale_x, double inv_scale_y);
 
 //! @}
 
diff --git a/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp
new file mode 100644
index 00000000000..25824e72097
--- /dev/null
+++ b/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#ifndef OPENCV_FASTCV_THRESH_DSP_HPP
+#define OPENCV_FASTCV_THRESH_DSP_HPP
+
+#include <opencv2/core.hpp>
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+//! @addtogroup fastcv
+//! @{
+
+/**
+ * @brief Binarizes a grayscale image using Otsu's method.
+ *        Sets the pixel to max(255) if it's value is greater than the threshold;
+ *        else, set the pixel to min(0). The threshold is searched that minimizes
+ *        the intra-class variance (the variance within the class).
+ * 
+ * @param _src Input 8-bit grayscale image. Size of buffer is srcStride*srcHeight bytes.
+ * @param _dst Output 8-bit binarized image. Size of buffer is dstStride*srcHeight bytes.
+ * @param type Threshold type that can be either 0 or 1.
+ *             NOTE: For threshold type=0, the pixel is set as
+ *             maxValue if it's value is greater than the threshold; else, it is set as zero.
+ *             For threshold type=1, the pixel is set as zero if it's
+ *             value is greater than the threshold; else, it is set as maxValue.
+ */
+CV_EXPORTS void thresholdOtsu(InputArray _src, OutputArray _dst, bool type);
+
+//! @}
+} // dsp::
+} // fastcv::
+} // cv::
+
+#endif // OPENCV_FASTCV_THRESH_DSP_HPP
\ No newline at end of file
diff --git a/modules/fastcv/include/opencv2/fastcv/warp.hpp b/modules/fastcv/include/opencv2/fastcv/warp.hpp
index 8f58cd36577..dae1a72bcc3 100644
--- a/modules/fastcv/include/opencv2/fastcv/warp.hpp
+++ b/modules/fastcv/include/opencv2/fastcv/warp.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -17,6 +17,20 @@ namespace fastcv {
 //! @addtogroup fastcv
 //! @{
 
+/**
+ * @brief   Transform an image using perspective transformation, same as cv::warpPerspective but not bit-exact.
+ * @param _src          Input 8-bit image.
+ * @param _dst          Output 8-bit image.
+ * @param _M0           3x3 perspective transformation matrix.
+ * @param dsize         Size of the output image.
+ * @param interpolation Interpolation method. Only cv::INTER_NEAREST, cv::INTER_LINEAR and cv::INTER_AREA are supported.
+ * @param borderType    Pixel extrapolation method. Only cv::BORDER_CONSTANT, cv::BORDER_REPLICATE and cv::BORDER_TRANSPARENT
+ *                      are supported.
+ * @param borderValue   Value used in case of a constant border.
+ */
+CV_EXPORTS_W void warpPerspective(InputArray _src, OutputArray _dst, InputArray _M0, Size dsize, int interpolation, int borderType,
+    const Scalar&  borderValue);
+
 /**
  * @brief Perspective warp two images using the same transformation. Bi-linear interpolation is used where applicable.
  *        For example, to warp a grayscale image and an alpha image at the same time, or warp two color channels.
@@ -30,6 +44,46 @@ namespace fastcv {
 CV_EXPORTS_W void warpPerspective2Plane(InputArray _src1, InputArray _src2, OutputArray _dst1, OutputArray _dst2,
     InputArray _M0, Size dsize);
 
+/**
+ * @brief Performs an affine transformation on an input image using a provided transformation matrix.
+ * 
+ * This function performs two types of operations based on the transformation matrix:
+ * 
+ * 1. Standard Affine Transformation (2x3 matrix):
+ *    - Transforms the entire input image using the affine matrix
+ *    - Supports both CV_8UC1 and CV_8UC3 types
+ * 
+ * 2. Patch Extraction with Transformation (2x2 matrix):
+ *    - Extracts and transforms a patch from the input image
+ *    - Only supports CV_8UC1 type
+ *    - If input is a ROI: patch is extracted from ROI center in the original image
+ *    - If input is full image: patch is extracted from image center
+ * 
+ * @param _src              Input image. Supported formats:
+ *                          - CV_8UC1: 8-bit single-channel
+ *                          - CV_8UC3: 8-bit three-channel - only for 2x3 matrix 
+ * @param _dst              Output image. Will have the same type as src and size specified by dsize
+ * @param _M                2x2/2x3 affine transformation matrix (inversed map)
+ * @param dsize             Output size:
+ *                          - For 2x3 matrix: Size of the output image
+ *                          - For 2x2 matrix: Size of the extracted patch
+ * @param interpolation     Interpolation method. Only applicable for 2x3 transformation with CV_8UC1 input.
+ *                          Options:
+ *                          - INTER_NEAREST: Nearest-neighbor interpolation
+ *                          - INTER_LINEAR: Bilinear interpolation (default)
+ *                          - INTER_AREA: Area-based interpolation
+ *                          - INTER_CUBIC: Bicubic interpolation
+ *                          Note: CV_8UC3 input always use bicubic interpolation internally
+ * @param borderValue       Constant pixel value for border pixels. Only applicable for 2x3 transformations 
+ *                          with single-channel input.
+ *
+ * @note                    The affine matrix follows the inverse mapping convention, applied to destination coordinates
+ *                          to produce corresponding source coordinates.
+ * @note                    The function uses 'FASTCV_BORDER_CONSTANT' for border handling, with the specified 'borderValue'.
+*/
+CV_EXPORTS_W void warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize, int interpolation = INTER_LINEAR, 
+                            int borderValue = 0);
+
 //! @}
 
 }
diff --git a/modules/fastcv/perf/perf_blur_dsp.cpp b/modules/fastcv/perf/perf_blur_dsp.cpp
new file mode 100644
index 00000000000..133ba85cd56
--- /dev/null
+++ b/modules/fastcv/perf/perf_blur_dsp.cpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef perf::TestBaseWithParam<tuple<Size, int, int>> Filter2DPerfTest_DSP;
+
+PERF_TEST_P(Filter2DPerfTest_DSP, run,
+    ::testing::Combine(::testing::Values(perf::szVGA, perf::sz720p),                // image size
+                       ::testing::Values(CV_8U,CV_16S,CV_32F),                      // dst image depth
+                       ::testing::Values(3, 5, 7)                                   // kernel size
+                       )
+           )
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Size srcSize = get<0>(GetParam());
+    int ddepth = get<1>(GetParam());
+    int ksize = get<2>(GetParam());
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(srcSize, CV_8U);
+
+    cv::Mat kernel;
+    cv::Mat dst;
+    kernel.allocator = cv::fastcv::getQcAllocator();
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    switch (ddepth)
+    {
+        case CV_8U:
+        case CV_16S:
+        {
+            kernel.create(ksize,ksize,CV_8S);
+            break;
+        }
+        case CV_32F:
+        {
+            kernel.create(ksize,ksize,CV_32F);
+            break;
+        }
+        default:
+            break;
+    }
+
+    cv::randu(src, 0, 256);
+    cv::randu(kernel, INT8_MIN, INT8_MAX);
+    RNG& rng = cv::theRNG();
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(255));
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::filter2D(src, dst, ddepth, kernel);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
diff --git a/modules/fastcv/perf/perf_edges_dsp.cpp b/modules/fastcv/perf/perf_edges_dsp.cpp
new file mode 100644
index 00000000000..02f6e570ab8
--- /dev/null
+++ b/modules/fastcv/perf/perf_edges_dsp.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef perf::TestBaseWithParam<tuple<Size, int, pair<int, int>, bool>> CannyPerfTest;
+
+PERF_TEST_P(CannyPerfTest, run,
+    ::testing::Combine(::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // image size
+        ::testing::Values(3, 5, 7), // aperture size
+        ::testing::Values(make_pair(0, 50), make_pair(100, 150), make_pair(50, 150)), // low and high thresholds
+        ::testing::Values(false, true) // L2gradient
+    )
+)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Size srcSize = get<0>(GetParam());
+    int apertureSize = get<1>(GetParam());
+    auto thresholds = get<2>(GetParam());
+    bool L2gradient = get<3>(GetParam());
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(srcSize, CV_8UC1);
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    cv::randu(src, 0, 256);
+
+    int lowThreshold = thresholds.first;
+    int highThreshold = thresholds.second;
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::Canny(src, dst, lowThreshold, highThreshold, apertureSize, L2gradient);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    SANITY_CHECK_NOTHING();
+}
+
+} //namespace
diff --git a/modules/fastcv/perf/perf_fft_dsp.cpp b/modules/fastcv/perf/perf_fft_dsp.cpp
new file mode 100644
index 00000000000..468a92aa4ac
--- /dev/null
+++ b/modules/fastcv/perf/perf_fft_dsp.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef perf::TestBaseWithParam<cv::Size> FFT_DSPExtPerfTest;
+
+PERF_TEST_P_(FFT_DSPExtPerfTest, forward)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size size = GetParam();
+
+    RNG& rng = cv::theRNG();
+
+    Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::FFT(src, dst);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    SANITY_CHECK_NOTHING();
+}
+
+PERF_TEST_P_(FFT_DSPExtPerfTest, inverse)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size size = GetParam();
+
+    RNG& rng = cv::theRNG();
+
+    Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    Mat fwd, back;
+    fwd.allocator = cv::fastcv::getQcAllocator();
+    back.allocator = cv::fastcv::getQcAllocator();
+
+    cv::fastcv::dsp::FFT(src, fwd);
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::IFFT(fwd, back);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    SANITY_CHECK_NOTHING();
+}
+
+INSTANTIATE_TEST_CASE_P(FastCV_Extension, FFT_DSPExtPerfTest,
+    ::testing::Values(Size(256, 256), Size(512, 512)));
+
+} // namespace
diff --git a/modules/fastcv/perf/perf_main.cpp b/modules/fastcv/perf/perf_main.cpp
index a6824dfb007..b43a0a3d84b 100644
--- a/modules/fastcv/perf/perf_main.cpp
+++ b/modules/fastcv/perf/perf_main.cpp
@@ -5,4 +5,9 @@
 
 #include "perf_precomp.hpp"
 
-CV_PERF_TEST_MAIN(imgproc)
+static void initFastCVTests()
+{
+    cvtest::registerGlobalSkipTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+}
+
+CV_PERF_TEST_MAIN(imgproc, initFastCVTests())
diff --git a/modules/fastcv/perf/perf_precomp.hpp b/modules/fastcv/perf/perf_precomp.hpp
index e052a0098e2..0a229f70e08 100644
--- a/modules/fastcv/perf/perf_precomp.hpp
+++ b/modules/fastcv/perf/perf_precomp.hpp
@@ -14,4 +14,6 @@ namespace opencv_test {
 using namespace perf;
 } // namespace
 
+#define CV_TEST_TAG_FASTCV_SKIP_DSP "fastcv_skip_dsp"
+
 #endif
diff --git a/modules/fastcv/perf/perf_sad_dsp.cpp b/modules/fastcv/perf/perf_sad_dsp.cpp
new file mode 100644
index 00000000000..0acd730efd4
--- /dev/null
+++ b/modules/fastcv/perf/perf_sad_dsp.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef std::tuple<cv::Size /*srcSize*/> SumOfAbsDiffsPerfParams;
+typedef perf::TestBaseWithParam<SumOfAbsDiffsPerfParams> SumOfAbsDiffsPerfTest;
+
+PERF_TEST_P(SumOfAbsDiffsPerfTest, run,
+    ::testing::Values(cv::Size(640, 480),  // VGA
+        cv::Size(1280, 720),               // 720p
+        cv::Size(1920, 1080))              // 1080p
+)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    // Initialize FastCV DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    auto p = GetParam();
+    cv::Size srcSize = std::get<0>(p);
+
+    RNG& rng = cv::theRNG();
+    cv::Mat patch, src;
+
+    patch.allocator = cv::fastcv::getQcAllocator(); // Use FastCV allocator for patch
+    src.allocator = cv::fastcv::getQcAllocator(); // Use FastCV allocator for src
+
+    patch.create(8, 8, CV_8UC1);
+    src.create(srcSize, CV_8UC1);
+
+    cvtest::randUni(rng, patch, cv::Scalar::all(0), cv::Scalar::all(255));
+    cvtest::randUni(rng, src, cv::Scalar::all(0), cv::Scalar::all(255));
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator(); // Use FastCV allocator for dst
+
+    while(next())
+    {
+        startTimer();
+        cv::fastcv::dsp::sumOfAbsoluteDiffs(patch, src, dst);
+        stopTimer();
+    }
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
diff --git a/modules/fastcv/perf/perf_scale.cpp b/modules/fastcv/perf/perf_scale.cpp
new file mode 100644
index 00000000000..e9975d51a96
--- /dev/null
+++ b/modules/fastcv/perf/perf_scale.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef perf::TestBaseWithParam<std::tuple<Size, int>> ResizePerfTest;
+
+PERF_TEST_P(ResizePerfTest, run, ::testing::Combine(
+    ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // image size
+    ::testing::Values(2, 4) // resize factor
+))
+{
+    Size size = std::get<0>(GetParam());
+    int factor = std::get<1>(GetParam());
+
+    cv::Mat inputImage(size, CV_8UC1);
+    cv::randu(inputImage, cv::Scalar::all(0), cv::Scalar::all(255));
+    
+    cv::Mat resized_image;
+    Size dsize(inputImage.cols / factor, inputImage.rows / factor);
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::resizeDown(inputImage, resized_image, dsize, 0, 0);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+typedef perf::TestBaseWithParam<std::tuple<Size, double, double, int>> ResizeByMnPerfTest;
+
+PERF_TEST_P(ResizeByMnPerfTest, run, ::testing::Combine(
+    ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // image size
+    ::testing::Values(0.35, 0.65), // inv_scale_x
+    ::testing::Values(0.35, 0.65), // inv_scale_y
+    ::testing::Values(CV_8UC1, CV_8UC2) // data type
+))
+{
+    Size size = std::get<0>(GetParam());
+    double inv_scale_x = std::get<1>(GetParam());
+    double inv_scale_y = std::get<2>(GetParam());
+    int type = std::get<3>(GetParam());
+
+    cv::Mat inputImage(size, type);
+    cv::randu(inputImage, cv::Scalar::all(0), cv::Scalar::all(255));
+    
+    Size dsize;
+    cv::Mat resized_image;
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::resizeDown(inputImage, resized_image, dsize, inv_scale_x, inv_scale_y);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
\ No newline at end of file
diff --git a/modules/fastcv/perf/perf_thresh_dsp.cpp b/modules/fastcv/perf/perf_thresh_dsp.cpp
new file mode 100644
index 00000000000..452b9464db1
--- /dev/null
+++ b/modules/fastcv/perf/perf_thresh_dsp.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "perf_precomp.hpp"
+
+namespace opencv_test {
+
+typedef std::tuple<cv::Size, bool /*type*/> ThresholdOtsuPerfParams;
+typedef perf::TestBaseWithParam<ThresholdOtsuPerfParams> ThresholdOtsuPerfTest;
+
+PERF_TEST_P(ThresholdOtsuPerfTest, run,
+    ::testing::Combine(::testing::Values(Size(320, 240), Size(640, 480), Size(1280, 720), Size(1920, 1080)),
+        ::testing::Values(false, true) // type
+    )
+)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    auto p = GetParam();
+    cv::Size size = std::get<0>(p);
+    bool type = std::get<1>(p);
+
+    RNG& rng = cv::theRNG();
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::dsp::thresholdOtsu(src, dst, type);
+        stopTimer();
+    }
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+    SANITY_CHECK_NOTHING();
+}
+
+} // namespace
diff --git a/modules/fastcv/perf/perf_warp.cpp b/modules/fastcv/perf/perf_warp.cpp
index 231056aef56..008c1e100a7 100644
--- a/modules/fastcv/perf/perf_warp.cpp
+++ b/modules/fastcv/perf/perf_warp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -7,31 +7,19 @@
 
 namespace opencv_test {
 
-typedef perf::TestBaseWithParam<Size> WarpPerspective2PlanePerfTest;
-
-PERF_TEST_P(WarpPerspective2PlanePerfTest, run,
-    ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p))
+static void getInvertMatrix(Mat& src, Size dstSize, Mat& M)
 {
-    cv::Size dstSize = GetParam();
-    cv::Mat img = imread(cvtest::findDataFile("cv/shared/baboon.png"));
-    Mat src(img.rows, img.cols, CV_8UC1);
-    cvtColor(img,src,cv::COLOR_BGR2GRAY);
-    cv::Mat dst1, dst2, mat;
-    mat.create(3,3,CV_32FC1);
-    dst1.create(dstSize,CV_8UC1);
-    dst2.create(dstSize,CV_8UC1);
-
     RNG& rng = cv::theRNG();
     Point2f s[4], d[4];
 
     s[0] = Point2f(0,0);
     d[0] = Point2f(0,0);
     s[1] = Point2f(src.cols-1.f,0);
-    d[1] = Point2f(dst1.cols-1.f,0);
+    d[1] = Point2f(dstSize.width-1.f,0);
     s[2] = Point2f(src.cols-1.f,src.rows-1.f);
-    d[2] = Point2f(dst1.cols-1.f,dst1.rows-1.f);
+    d[2] = Point2f(dstSize.width-1.f,dstSize.height-1.f);
     s[3] = Point2f(0,src.rows-1.f);
-    d[3] = Point2f(0,dst1.rows-1.f);
+    d[3] = Point2f(0,dstSize.height-1.f);
 
     float buffer[16];
     Mat tmp( 1, 16, CV_32FC1, buffer );
@@ -41,18 +29,204 @@ PERF_TEST_P(WarpPerspective2PlanePerfTest, run,
     {
         s[i].x += buffer[i*4]*src.cols/2;
         s[i].y += buffer[i*4+1]*src.rows/2;
-        d[i].x += buffer[i*4+2]*dst1.cols/2;
-        d[i].y += buffer[i*4+3]*dst1.rows/2;
+        d[i].x += buffer[i*4+2]*dstSize.width/2;
+        d[i].y += buffer[i*4+3]*dstSize.height/2;
     }
 
-    cv::getPerspectiveTransform( s, d ).convertTo( mat, mat.depth() );
+    cv::getPerspectiveTransform( s, d ).convertTo( M, M.depth() );
+
     // Invert the perspective matrix
-    invert(mat,mat);
+    invert(M,M);
+}
+
+static cv::Mat getInverseAffine(const cv::Mat& affine)
+{
+    // Extract the 2x2 part
+    cv::Mat rotationScaling = affine(cv::Rect(0, 0, 2, 2));
+
+    // Invert the 2x2 part
+    cv::Mat inverseRotationScaling;
+    cv::invert(rotationScaling, inverseRotationScaling);
+
+    // Extract the translation part
+    cv::Mat translation = affine(cv::Rect(2, 0, 1, 2));
+
+    // Compute the new translation
+    cv::Mat inverseTranslation = -inverseRotationScaling * translation;
+
+    // Construct the inverse affine matrix
+    cv::Mat inverseAffine = cv::Mat::zeros(2, 3, CV_32F);
+    inverseRotationScaling.copyTo(inverseAffine(cv::Rect(0, 0, 2, 2)));
+    inverseTranslation.copyTo(inverseAffine(cv::Rect(2, 0, 1, 2)));
+
+    return inverseAffine;
+}
+
+typedef perf::TestBaseWithParam<Size> WarpPerspective2PlanePerfTest;
+
+PERF_TEST_P(WarpPerspective2PlanePerfTest, run,
+    ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p))
+{
+    cv::Size dstSize = GetParam();
+    cv::Mat img = imread(cvtest::findDataFile("cv/shared/baboon.png"));
+    Mat src(img.rows, img.cols, CV_8UC1);
+    cvtColor(img,src,cv::COLOR_BGR2GRAY);
+    cv::Mat dst1, dst2, matrix;
+    matrix.create(3,3,CV_32FC1);
+
+    getInvertMatrix(src, dstSize, matrix);
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::warpPerspective2Plane(src, src, dst1, dst2, matrix, dstSize);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+typedef perf::TestBaseWithParam<tuple<Size, int, int>> WarpPerspectivePerfTest;
+
+PERF_TEST_P(WarpPerspectivePerfTest, run,
+    ::testing::Combine( ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p),
+                        ::testing::Values(INTER_NEAREST, INTER_LINEAR, INTER_AREA),
+                        ::testing::Values(BORDER_CONSTANT, BORDER_REPLICATE, BORDER_TRANSPARENT)))
+{
+    cv::Size dstSize = get<0>(GetParam());
+    int interplation = get<1>(GetParam());
+    int borderType   = get<2>(GetParam());
+    cv::Scalar borderValue = Scalar::all(100);
+
+    cv::Mat src = imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    EXPECT_FALSE(src.empty());
+
+    cv::Mat dst, matrix, ref;
+    matrix.create(3, 3, CV_32FC1);
+
+    getInvertMatrix(src, dstSize, matrix);
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::warpPerspective(src, dst, matrix, dstSize, interplation, borderType, borderValue);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+typedef TestBaseWithParam< tuple<MatType, Size> > WarpAffine3ChannelPerf;
+
+PERF_TEST_P(WarpAffine3ChannelPerf, run, Combine(
+            Values(CV_8UC3),
+            Values( szVGA, sz720p, sz1080p)
+))
+{
+    Size sz, szSrc(512, 512);
+    int dataType;
+    dataType   = get<0>(GetParam());
+    sz         = get<1>(GetParam());
+
+    cv::Mat src(szSrc, dataType), dst(sz, dataType);
+
+    cvtest::fillGradient(src);
+
+    //Affine matrix
+    float angle = 30.0; // Rotation angle in degrees
+    float scale = 2.2;  // Scale factor
+    cv::Mat affine = cv::getRotationMatrix2D(cv::Point2f(100, 100), angle, scale);
+
+    // Compute the inverse affine matrix
+    cv::Mat inverseAffine = getInverseAffine(affine);
+
+    // Create the dstBorder array
+    Mat dstBorder;
+
+    declare.in(src).out(dst);
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::warpAffine(src, dst, inverseAffine, sz);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+typedef perf::TestBaseWithParam<std::tuple<cv::Size, cv::Point2f, cv::Mat>> WarpAffineROIPerfTest;
+
+PERF_TEST_P(WarpAffineROIPerfTest, run, ::testing::Combine(
+    ::testing::Values(cv::Size(50, 50), cv::Size(100, 100)), // patch size
+    ::testing::Values(cv::Point2f(50.0f, 50.0f), cv::Point2f(100.0f, 100.0f)), // position
+    ::testing::Values((cv::Mat_<float>(2, 2) << 1, 0, 0, 1), // identity matrix
+                      (cv::Mat_<float>(2, 2) << cos(CV_PI), -sin(CV_PI), sin(CV_PI), cos(CV_PI))) // rotation matrix
+))
+{
+    cv::Size patchSize = std::get<0>(GetParam());
+    cv::Point2f position = std::get<1>(GetParam());
+    cv::Mat affine = std::get<2>(GetParam());
+
+    cv::Mat src = cv::imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    
+    // Create ROI with top-left at the specified position
+    cv::Rect roiRect(static_cast<int>(position.x), static_cast<int>(position.y), patchSize.width, patchSize.height);
+
+    // Ensure ROI is within image bounds
+    roiRect = roiRect & cv::Rect(0, 0, src.cols, src.rows);
+    cv::Mat roi = src(roiRect);
+
+    cv::Mat patch;
+
+    while (next())
+    {
+        startTimer();
+        cv::fastcv::warpAffine(roi, patch, affine, patchSize);
+        stopTimer();
+    }
+
+    SANITY_CHECK_NOTHING();
+}
+
+typedef TestBaseWithParam<tuple<int, int> > WarpAffinePerfTest;
+
+PERF_TEST_P(WarpAffinePerfTest, run, ::testing::Combine(
+    ::testing::Values(cv::InterpolationFlags::INTER_NEAREST, cv::InterpolationFlags::INTER_LINEAR, cv::InterpolationFlags::INTER_AREA),
+    ::testing::Values(0, 255) // Black and white borders
+))
+{
+    // Load the source image
+    cv::Mat src = cv::imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty());
+
+    // Generate random values for the affine matrix
+    std::srand(std::time(0));
+    float angle = static_cast<float>(std::rand() % 360); // Random angle between 0 and 360 degrees
+    float scale = static_cast<float>(std::rand() % 200) / 100.0f + 0.5f; // Random scale between 0.5 and 2.5
+    float tx = static_cast<float>(std::rand() % 100) - 50; // Random translation between -50 and 50
+    float ty = static_cast<float>(std::rand() % 100) - 50; // Random translation between -50 and 50
+    float radians = angle * CV_PI / 180.0;
+    cv::Mat affine = (cv::Mat_<float>(2, 3) << scale * cos(radians), -scale * sin(radians), tx,
+                                               scale * sin(radians),  scale * cos(radians), ty);
+
+    // Compute the inverse affine matrix
+    cv::Mat inverseAffine = getInverseAffine(affine);
+
+    // Define the destination size
+    cv::Size dsize(src.cols, src.rows);
+
+    // Define the output matrix
+    cv::Mat dst;
+
+    // Get the parameters
+    int interpolation = std::get<0>(GetParam());
+    int borderValue = std::get<1>(GetParam());
 
     while (next())
     {
         startTimer();
-        cv::fastcv::warpPerspective2Plane(src, src, dst1, dst2, mat, dstSize);
+        cv::fastcv::warpAffine(src, dst, inverseAffine, dsize, interpolation, borderValue);
         stopTimer();
     }
 
diff --git a/modules/fastcv/src/allocator.cpp b/modules/fastcv/src/allocator.cpp
new file mode 100644
index 00000000000..83147d2354a
--- /dev/null
+++ b/modules/fastcv/src/allocator.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+
+QcResourceManager& QcResourceManager::getInstance() {
+    static QcResourceManager instance;
+    return instance;
+}
+
+void QcResourceManager::addAllocation(void* ptr) {
+    std::lock_guard<std::mutex> lock(resourceMutex);
+    activeAllocations.insert(ptr);
+    CV_LOG_DEBUG(NULL, cv::format("Active Allocations: %zu", activeAllocations.size()));
+}
+
+void QcResourceManager::removeAllocation(void* ptr) {
+    std::lock_guard<std::mutex> lock(resourceMutex);
+    activeAllocations.erase(ptr);
+    CV_LOG_DEBUG(NULL, cv::format("Active Allocations: %zu", activeAllocations.size()));
+}
+
+QcAllocator::QcAllocator()
+{
+}
+
+QcAllocator::~QcAllocator()
+{
+}
+
+cv::UMatData* QcAllocator::allocate(int dims, const int* sizes, int type,
+                    void* data0, size_t* step, cv::AccessFlag flags,
+                    cv::UMatUsageFlags usageFlags) const
+{
+    CV_UNUSED(flags);
+    CV_UNUSED(usageFlags);
+
+    size_t total = CV_ELEM_SIZE(type);
+    for( int i = dims-1; i >= 0; i-- )
+    {
+        if( step )
+        {
+            if( data0 && step[i] != CV_AUTOSTEP )
+            {
+                CV_Assert(total <= step[i]);
+                total = step[i];
+            }
+            else
+                step[i] = total;
+        }
+        total *= sizes[i];
+    }
+    uchar* data = data0 ? (uchar*)data0 : (uchar*)fcvHwMemAlloc(total, 16);
+    cv::UMatData* u = new cv::UMatData(this);
+    u->data = u->origdata = data;
+    u->size = total;
+    if(data0)
+        u->flags |= cv::UMatData::USER_ALLOCATED;
+
+    // Add to active allocations
+    cv::fastcv::QcResourceManager::getInstance().addAllocation(data);
+
+    return u;
+}
+
+bool QcAllocator::allocate(cv::UMatData* u, cv::AccessFlag accessFlags, cv::UMatUsageFlags usageFlags) const
+{
+    CV_UNUSED(accessFlags);
+    CV_UNUSED(usageFlags);
+
+    return u != nullptr;
+}
+
+void QcAllocator::deallocate(cv::UMatData* u) const
+{
+    if(!u)
+        return;
+
+    CV_Assert(u->urefcount == 0);
+    CV_Assert(u->refcount == 0);
+    if( !(u->flags & cv::UMatData::USER_ALLOCATED) )
+    {
+        fcvHwMemFree(u->origdata);
+
+        // Remove from active allocations
+        cv::fastcv::QcResourceManager::getInstance().removeAllocation(u->origdata);
+        u->origdata = 0;
+    }
+
+    delete u;
+}
+
+cv::MatAllocator* getQcAllocator()
+{
+    static cv::MatAllocator* allocator = new QcAllocator;
+    return allocator;
+}
+
+}
+}
diff --git a/modules/fastcv/src/blur_dsp.cpp b/modules/fastcv/src/blur_dsp.cpp
new file mode 100644
index 00000000000..b6147b54ba3
--- /dev/null
+++ b/modules/fastcv/src/blur_dsp.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel)
+{
+    CV_Assert(
+        !_src.empty() && 
+        _src.type() == CV_8UC1 && 
+        IS_FASTCV_ALLOCATED(_src.getMat()) && 
+        IS_FASTCV_ALLOCATED(_kernel.getMat())
+    );
+
+    Mat kernel = _kernel.getMat();
+
+    Size ksize = kernel.size();
+    CV_Assert(ksize.width == ksize.height);
+    CV_Assert(ksize.width % 2 == 1);
+
+    _dst.create(_src.size(), ddepth);
+    Mat src = _src.getMat();
+    Mat dst = _dst.getMat();
+
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+
+    switch (ddepth)
+    {
+        case CV_8U:
+        {
+            if(ksize.width == 3)
+                fcvFilterCorr3x3s8_v2Q((int8_t*)kernel.data, src.data, src.cols, src.rows, src.step, dst.data, dst.step);
+            else
+                fcvFilterCorrNxNu8Q((int8_t*)kernel.data, ksize.width, 0, src.data, src.cols, src.rows, src.step, dst.data, dst.step);
+            
+            break;
+        }
+        case CV_16S:
+        {
+            fcvFilterCorrNxNu8s16Q((int8_t*)kernel.data, ksize.width, 0, src.data, src.cols, src.rows, src.step, (int16_t*)dst.data, dst.step);
+            break;
+        }
+        case CV_32F:
+        {
+            fcvFilterCorrNxNu8f32Q((float32_t*)kernel.data, ksize.width, src.data, src.cols, src.rows, src.step, (float32_t*)dst.data, dst.step);
+            break;
+        }
+        default:
+        {
+            CV_Error(cv::Error::StsBadArg, cv::format("Kernel Size:%d, Dst type:%s is not supported", ksize.width,
+                depthToString(ddepth)));
+        }
+    }
+}
+
+} // dsp::
+} // fastcv::
+} // cv::
\ No newline at end of file
diff --git a/modules/fastcv/src/dsp_init.cpp b/modules/fastcv/src/dsp_init.cpp
new file mode 100644
index 00000000000..ee0bff8ba1d
--- /dev/null
+++ b/modules/fastcv/src/dsp_init.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+//CHANGE FASTCV Q6 INIT
+int fcvdspinit()
+{
+    FastCvDspContext& context = FastCvDspContext::getContext();
+    
+    if (context.isInitialized()) {
+        CV_LOG_INFO(NULL, "FastCV DSP already initialized, skipping initialization");
+        return 0;
+    }
+    if (!context.initialize()) {
+        CV_LOG_ERROR(NULL, "Failed to initialize FastCV DSP");
+        return -1;
+    }
+    CV_LOG_INFO(NULL, "FastCV DSP initialized successfully");
+    return 0;
+}
+
+void fcvdspdeinit()
+{
+    // Deinitialize the DSP environment
+    FastCvDspContext& context = FastCvDspContext::getContext();
+    
+    if (!context.isInitialized()) {
+        CV_LOG_INFO(NULL, "FastCV DSP already deinitialized, skipping deinitialization");
+        return;
+    }
+    if (!context.deinitialize()) {
+        CV_LOG_ERROR(NULL, "Failed to deinitialize FastCV DSP");
+    }
+    CV_LOG_INFO(NULL, "FastCV DSP deinitialized successfully");
+}
+
+
+} // namespace dsp
+} // namespace fastcv
+} // namespace cv
\ No newline at end of file
diff --git a/modules/fastcv/src/edges_dsp.cpp b/modules/fastcv/src/edges_dsp.cpp
new file mode 100644
index 00000000000..ea121e73d04
--- /dev/null
+++ b/modules/fastcv/src/edges_dsp.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+void Canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize, bool L2gradient)
+{
+    CV_Assert(
+        !_src.empty() && 
+        lowThreshold <= highThreshold &&
+        IS_FASTCV_ALLOCATED(_src.getMat())
+    );
+
+    int type = _src.type();
+    CV_Assert(type == CV_8UC1);
+    CV_Assert(_src.step() % 8 == 0);
+
+    Size size = _src.size();
+    _dst.create(size, type);
+    Mat src = _src.getMat();
+    CV_Assert(src.step >= (size_t)src.cols);
+    CV_Assert(reinterpret_cast<uintptr_t>(src.data) % 8 == 0);
+
+    Mat dst = _dst.getMat();
+
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+    CV_Assert(reinterpret_cast<uintptr_t>(dst.data) % 8 == 0);
+    CV_Assert(dst.step >= (size_t)src.cols);
+
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+
+    fcvNormType norm;
+
+    if (L2gradient)
+        norm = FASTCV_NORM_L2;
+    else
+        norm = FASTCV_NORM_L1;
+
+    int16_t* gx = (int16_t*)fcvHwMemAlloc(src.cols * src.rows * sizeof(int16_t), 16);
+    int16_t* gy = (int16_t*)fcvHwMemAlloc(src.cols * src.rows * sizeof(int16_t), 16);
+    uint32_t gstride = 2 * src.cols;
+    fcvStatus status = fcvFilterCannyu8Q((uint8_t*)src.data, src.cols, src.rows, src.step, apertureSize, lowThreshold, highThreshold, norm, (uint8_t*)dst.data, dst.step, gx, gy, gstride);
+    fcvHwMemFree(gx);
+    fcvHwMemFree(gy);
+
+    if (status != FASTCV_SUCCESS)
+    {
+        std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
+        CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
+    }
+}
+
+} // dsp::
+} // fastcv::
+} // cv::
\ No newline at end of file
diff --git a/modules/fastcv/src/fft_dsp.cpp b/modules/fastcv/src/fft_dsp.cpp
new file mode 100644
index 00000000000..f3fd07024ea
--- /dev/null
+++ b/modules/fastcv/src/fft_dsp.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+static bool isPow2(int x)
+{
+    return x && (!(x & (x - 1)));
+}
+
+void FFT(InputArray _src, OutputArray _dst)
+{
+    CV_Assert(
+        !_src.empty() && 
+        _src.type() == CV_8UC1 && 
+        IS_FASTCV_ALLOCATED(_src.getMat())
+    );
+
+    CV_Assert(isPow2(_src.rows()) || _src.rows() == 1);
+    CV_Assert(isPow2(_src.cols()));
+    CV_Assert(_src.step() % 8 == 0);
+    CV_Assert(static_cast<unsigned long>(_src.rows() * _src.cols()) > MIN_REMOTE_BUF_SIZE);
+
+    Mat src = _src.getMat();
+    CV_Assert(reinterpret_cast<uintptr_t>(src.data) % 8 == 0);
+
+    _dst.create(_src.rows(), _src.cols(), CV_32FC2);
+    CV_Assert(_dst.step() % 8 == 0);
+    Mat dst = _dst.getMat();
+
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+    CV_Assert(reinterpret_cast<uintptr_t>(dst.data) % 8 == 0);
+    
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+
+    fcvStatus status = fcvFFTu8Q(src.data, src.cols, src.rows, src.step,
+        (float*)dst.data, dst.step);
+
+    if (status != FASTCV_SUCCESS)
+    {
+        std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
+        CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
+    }
+}
+
+void IFFT(InputArray _src, OutputArray _dst)
+{
+    CV_Assert(
+        !_src.empty() && 
+        _src.type() == CV_32FC2 &&
+        IS_FASTCV_ALLOCATED(_src.getMat())
+    );
+
+    CV_Assert(isPow2(_src.rows()) || _src.rows() == 1);
+    CV_Assert(isPow2(_src.cols()));
+
+    CV_Assert(_src.step() % 8 == 0);
+    CV_Assert(static_cast<unsigned long>(_src.rows() * _src.cols() * sizeof(float32_t)) > MIN_REMOTE_BUF_SIZE);
+
+    Mat src = _src.getMat();
+
+    CV_Assert(reinterpret_cast<uintptr_t>(src.data) % 8 == 0);
+
+    _dst.create(_src.rows(), _src.cols(), CV_8UC1);
+
+    CV_Assert(_dst.step() % 8 == 0);
+
+    Mat dst = _dst.getMat();
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+    CV_Assert(reinterpret_cast<uintptr_t>(dst.data) % 8 == 0);
+
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+
+    fcvStatus status = fcvIFFTf32Q((const float*)src.data, src.cols * 2, src.rows, src.step,
+        dst.data, dst.step);
+
+    if (status != FASTCV_SUCCESS)
+    {
+        std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
+        CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
+    }
+}
+
+} // dsp::
+} // fastcv::
+} // cv::
\ No newline at end of file
diff --git a/modules/fastcv/src/precomp.hpp b/modules/fastcv/src/precomp.hpp
index c2929d76cc1..c5485eeff1a 100644
--- a/modules/fastcv/src/precomp.hpp
+++ b/modules/fastcv/src/precomp.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -10,11 +10,13 @@
 #include <opencv2/imgproc.hpp>
 #include "opencv2/core/private.hpp"
 #include "opencv2/core/utils/logger.hpp"
-
+#include <opencv2/core/core_c.h>
 #include <opencv2/fastcv.hpp>
 #include <map>
+#include <atomic>
 
 #include "fastcv.h"
+#include "fastcvDsp.h"
 
 namespace cv {
 namespace fastcv {
@@ -30,6 +32,7 @@ namespace fastcv {
 
 #define FCV_KernelSize_SHIFT 3
 #define FCV_MAKETYPE(ksize,depth) ((ksize<<FCV_KernelSize_SHIFT) + depth)
+#define MIN_REMOTE_BUF_SIZE 176*144*sizeof(uint8_t)
 
 const std::map<fcvStatus, std::string> fcvStatusStrings =
 {
@@ -72,6 +75,111 @@ struct FastCvContext
     bool isInitialized;
 };
 
+namespace dsp {
+    struct FastCvDspContext;
+
+    #define IS_FASTCV_ALLOCATED(mat) \
+    ((mat.allocator == cv::fastcv::getQcAllocator()) ? true : \
+        (CV_Error(cv::Error::StsBadArg, cv::format("Matrix '%s' not allocated with FastCV allocator. " \
+                                    "Please ensure that the matrix is created using " \
+                                    "cv::fastcv::getQcAllocator().", #mat)), false))
+    
+    #define FASTCV_CHECK_DSP_INIT() \
+    if (!FastCvDspContext::getContext().isInitialized() && \
+        fcvdspinit() != 0) \
+    { \
+        CV_Error(cv::Error::StsError, "Failed to initialize DSP"); \
+    }
+                                
+    struct FastCvDspContext
+    {
+    private:
+        mutable cv::Mutex initMutex;
+        std::atomic<bool> isDspInitialized{false};
+        std::atomic<uint64_t> initializationCount{0};
+        std::atomic<uint64_t> deInitializationCount{0};
+
+        static FastCvDspContext& getInstanceImpl() {
+            static FastCvDspContext context;
+            return context;
+        }
+    public:
+        static FastCvDspContext& getContext() {
+            return getInstanceImpl();
+        }
+
+        FastCvDspContext(const FastCvDspContext&) = delete;
+        FastCvDspContext& operator=(const FastCvDspContext&) = delete;
+
+        bool initialize() {
+            cv::AutoLock lock(initMutex);
+            
+            if (isDspInitialized.load(std::memory_order_acquire)) {
+                CV_LOG_INFO(NULL, "FastCV DSP already initialized, skipping initialization");
+                return true;
+            }
+
+            CV_LOG_INFO(NULL, "Initializing FastCV DSP");
+
+            if (fcvQ6Init() == 0) {
+                isDspInitialized.store(true, std::memory_order_release);
+                initializationCount++;
+                CV_LOG_DEBUG(NULL, cv::format("FastCV DSP initialized (init count: %lu, deinit count: %lu)", 
+                initializationCount.load(), deInitializationCount.load()));
+
+                return true;
+            }
+    
+            CV_LOG_ERROR(NULL, "FastCV DSP initialization failed");
+            return false;
+        }
+
+        bool deinitialize() {
+            cv::AutoLock lock(initMutex);
+            
+            if (!isDspInitialized.load(std::memory_order_acquire)) {
+                CV_LOG_DEBUG(NULL, "FastCV DSP already deinitialized, skipping deinitialization");
+                return true;
+            }
+
+            CV_LOG_INFO(NULL, "Deinitializing FastCV DSP");
+            
+            try {
+                fcvQ6DeInit();
+                isDspInitialized.store(false, std::memory_order_release);
+                deInitializationCount++;
+                CV_LOG_DEBUG(NULL, cv::format("FastCV DSP deinitialized (init count: %lu, deinit count: %lu)", 
+                    initializationCount.load(), deInitializationCount.load()));
+         
+                return true;
+            }
+            catch (...) {
+                CV_LOG_ERROR(NULL, "Exception occurred during FastCV DSP deinitialization");
+                return false;
+            }
+        }
+
+        bool isInitialized() const {
+            return isDspInitialized.load(std::memory_order_acquire);
+        }
+
+        uint64_t getDspInitCount() const {
+            return initializationCount.load(std::memory_order_acquire);
+        }
+
+        uint64_t getDspDeInitCount() const {
+            return deInitializationCount.load(std::memory_order_acquire);
+        }
+
+        const cv::Mutex& getInitMutex() const {
+            return initMutex;
+        }
+    
+    private:
+        FastCvDspContext() = default;
+};
+
+} // namespace dsp
 } // namespace fastcv
 } // namespace cv
 
diff --git a/modules/fastcv/src/sad_dsp.cpp b/modules/fastcv/src/sad_dsp.cpp
new file mode 100644
index 00000000000..a58c1383cf6
--- /dev/null
+++ b/modules/fastcv/src/sad_dsp.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst) 
+{
+    cv::Mat patch = _patch.getMat();
+    cv::Mat src = _src.getMat();
+    
+    // Check if matrices are allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(patch));
+    CV_Assert(IS_FASTCV_ALLOCATED(src));
+    
+    CV_Assert(!_src.empty() && "src is empty");
+    CV_Assert(_src.type() == CV_8UC1 && "src type is not CV_8UC1");
+    CV_Assert(_src.step() * _src.rows() > MIN_REMOTE_BUF_SIZE && "src buffer size is too small");
+    CV_Assert(!_patch.empty() && "patch is empty");
+    CV_Assert(_patch.type() == CV_8UC1 && "patch type is not CV_8UC1");
+    CV_Assert(_patch.size() == cv::Size(8, 8) && "patch size is not 8x8");
+
+    cv::Size size = _src.size();
+    _dst.create(size, CV_16UC1);
+    cv::Mat dst = _dst.getMat();
+
+    CV_Assert(((intptr_t)src.data & 0x7) == 0 && "src data is not 8-byte aligned");
+    CV_Assert(((intptr_t)dst.data & 0x7) == 0 && "dst data is not 8-byte aligned");
+    
+    // Check if dst is allocated by the QcAllocator
+    CV_Assert(IS_FASTCV_ALLOCATED(dst));
+
+    // Check DSP initialization status and initialize if needed
+    FASTCV_CHECK_DSP_INIT();
+    
+    fcvSumOfAbsoluteDiffs8x8u8_v2Q((uint8_t*)patch.data, patch.step, (uint8_t*)src.data, src.cols, src.rows, src.step, (uint16_t*)dst.data, dst.step);
+}
+
+} // dsp::
+} // fastcv::
+} // cv::
diff --git a/modules/fastcv/src/scale.cpp b/modules/fastcv/src/scale.cpp
index 3e1a3a74b8a..0e37e96213f 100644
--- a/modules/fastcv/src/scale.cpp
+++ b/modules/fastcv/src/scale.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -8,55 +8,68 @@
 namespace cv {
 namespace fastcv {
 
-void resizeDownBy2(cv::InputArray _src, cv::OutputArray _dst)
+void resizeDown(cv::InputArray _src, cv::OutputArray _dst, Size dsize, double inv_scale_x, double inv_scale_y)
 {
-    INITIALIZATION_CHECK;
-
-    CV_Assert(!_src.empty() && _src.type() == CV_8UC1);
-
-    Mat src = _src.getMat();
-    CV_Assert((src.cols & 1)==0 && (src.rows & 1)==0);
-
-    int type = _src.type();
-    cv::Size dsize(src.cols / 2, src.rows / 2);
-
-    _dst.create(dsize, type);
-
-    Mat dst = _dst.getMat();
+    fcvStatus status = FASTCV_SUCCESS;
+    Size ssize = _src.size();
 
-    fcvStatus status = (fcvStatus)fcvScaleDownBy2u8_v2((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data,
-        src.cols/2);
+    CV_Assert(!_src.empty() );
+    CV_Assert( _src.type() == CV_8UC1 || _src.type() == CV_8UC2 );
 
-    if (status != FASTCV_SUCCESS)
+    if( dsize.empty() )
     {
-        std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
-        CV_Error( cv::Error::StsInternal, "FastCV error: " + s);
+        CV_Assert(inv_scale_x > 0);
+        CV_Assert(inv_scale_y > 0);
+        dsize = Size(saturate_cast<int>(ssize.width*inv_scale_x),
+                     saturate_cast<int>(ssize.height*inv_scale_y));
+        CV_Assert( !dsize.empty() );
+    }
+    else
+    {
+        inv_scale_x = static_cast<double>(dsize.width) / ssize.width;
+        inv_scale_y = static_cast<double>(dsize.height) / ssize.height;
+        CV_Assert(inv_scale_x > 0);
+        CV_Assert(inv_scale_y > 0);
     }
-}
-
-void resizeDownBy4(cv::InputArray _src, cv::OutputArray _dst)
-{
-    INITIALIZATION_CHECK;
-
-    CV_Assert(!_src.empty() && _src.type() == CV_8UC1);
 
-    Mat src = _src.getMat();
-    CV_Assert((src.cols & 3)==0 && (src.rows & 3)==0);
+    CV_Assert(dsize.width <= ssize.width && dsize.height <= ssize.height);
 
-    int type = _src.type();
-    cv::Size dsize(src.cols / 4, src.rows / 4);
+    CV_Assert(dsize.width * 20 > ssize.width);
+    CV_Assert(dsize.height * 20 > ssize.height);
 
-    _dst.create(dsize, type);
+    INITIALIZATION_CHECK;
 
+    Mat src = _src.getMat();
+    _dst.create(dsize, src.type());
     Mat dst = _dst.getMat();
 
-    fcvStatus status = (fcvStatus)fcvScaleDownBy4u8_v2((const uint8_t*)src.data, src.cols, src.rows, src.step,
-        (uint8_t*)dst.data, src.cols/4);
+    // Alignment checks
+    CV_Assert(reinterpret_cast<uintptr_t>(src.data) % 16 == 0);
+    CV_Assert(reinterpret_cast<uintptr_t>(dst.data) % 16 == 0);
+
+    if(src.type() == CV_8UC2)
+    {
+        fcvScaleDownMNInterleaveu8((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data, dst.cols, dst.rows, dst.step);
+    }
+    else if (src.cols/dst.cols == 4 && src.rows/dst.rows == 4 && src.cols % dst.cols == 0 && src.rows % dst.rows == 0)
+    {
+        CV_Assert(src.rows % 4 == 0);
+        status = (fcvStatus)fcvScaleDownBy4u8_v2((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data, dst.step);
+    }
+    else if (src.cols/dst.cols == 2 && src.rows/dst.rows == 2 && src.cols % dst.cols == 0 && src.rows % dst.rows == 0)
+    {
+        CV_Assert(src.rows % 2 == 0);
+        status = (fcvStatus)fcvScaleDownBy2u8_v2((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data, dst.step);
+    }
+    else
+    {
+        fcvScaleDownMNu8((const uint8_t*)src.data, src.cols, src.rows, src.step, (uint8_t*)dst.data, dst.cols, dst.rows, dst.step);
+    }
 
     if (status != FASTCV_SUCCESS)
     {
         std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
-        CV_Error( cv::Error::StsInternal, "FastCV error: " + s);
+        CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
     }
 }
 
diff --git a/modules/fastcv/src/thresh_dsp.cpp b/modules/fastcv/src/thresh_dsp.cpp
new file mode 100644
index 00000000000..9c74e619d37
--- /dev/null
+++ b/modules/fastcv/src/thresh_dsp.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "precomp.hpp"
+
+namespace cv {
+namespace fastcv {
+namespace dsp {
+
+    void thresholdOtsu(InputArray _src, OutputArray _dst, bool type)
+    {
+        CV_Assert(
+            !_src.empty() && 
+            _src.type() == CV_8UC1 && 
+            IS_FASTCV_ALLOCATED(_src.getMat())
+        );
+
+        CV_Assert((_src.step() * _src.rows()) > MIN_REMOTE_BUF_SIZE);
+        CV_Assert(_src.cols() % 8 == 0);
+        CV_Assert(_src.step() % 8 == 0);
+
+        Mat src = _src.getMat();
+        CV_Assert(((uintptr_t)src.data & 0x7) == 0);
+
+        _dst.create(_src.size(), CV_8UC1);
+        CV_Assert(_dst.step() % 8 == 0);
+        CV_Assert(_dst.cols() % 8 == 0);
+        Mat dst = _dst.getMat();
+
+        // Check if dst is allocated by the QcAllocator
+        CV_Assert(IS_FASTCV_ALLOCATED(dst));
+        CV_Assert(((uintptr_t)dst.data & 0x7) == 0);
+        
+        if (src.data == dst.data) {
+            CV_Assert(src.step == dst.step);
+        }
+
+        // Check DSP initialization status and initialize if needed
+        FASTCV_CHECK_DSP_INIT();
+
+        fcvThreshType threshType;
+
+        if (type)
+            threshType = FCV_THRESH_BINARY_INV;
+        else
+            threshType = FCV_THRESH_BINARY;
+
+        fcvFilterThresholdOtsuu8Q(src.data, src.cols, src.rows, src.step, dst.data, dst.step, threshType);
+    }
+
+} // dsp::
+} // fastcv::
+} // cv::
\ No newline at end of file
diff --git a/modules/fastcv/src/warp.cpp b/modules/fastcv/src/warp.cpp
index 01f83bdf510..28e312e26f9 100644
--- a/modules/fastcv/src/warp.cpp
+++ b/modules/fastcv/src/warp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -12,49 +12,52 @@ class FcvWarpPerspectiveLoop_Invoker : public cv::ParallelLoopBody
 {
     public:
 
-    FcvWarpPerspectiveLoop_Invoker(InputArray _src1, InputArray _src2, OutputArray _dst1, OutputArray _dst2, InputArray _M0,
-        Size _dsize) : cv::ParallelLoopBody()
-    {
-        src1 = _src1.getMat();
-        src2 = _src2.getMat();
-        dsize = _dsize;
-
-        _dst1.create(dsize, src1.type());
-        _dst2.create(dsize, src2.type());
-        dst1 = _dst1.getMat();
-        dst2 = _dst2.getMat();
-
-        M = _M0.getMat();
-    }
+    FcvWarpPerspectiveLoop_Invoker(const Mat& _src1, const Mat& _src2, Mat& _dst1, Mat& _dst2,
+        const float * _M, fcvInterpolationType _interpolation = FASTCV_INTERPOLATION_TYPE_NEAREST_NEIGHBOR,
+        fcvBorderType _borderType = fcvBorderType::FASTCV_BORDER_UNDEFINED, const int _borderValue = 0)
+        : ParallelLoopBody(), src1(_src1), src2(_src2), dst1(_dst1), dst2(_dst2), M(_M), interpolation(_interpolation),
+        borderType(_borderType), borderValue(_borderValue)
+    {}
 
     virtual void operator()(const cv::Range& range) const CV_OVERRIDE
     {
-        uchar* dst1_ptr = dst1.data + range.start*dst1.step;
-        uchar* dst2_ptr = dst2.data + range.start*dst2.step;
+        uchar* dst1_ptr = dst1.data + range.start * dst1.step;
         int rangeHeight = range.end - range.start;
 
         float rangeMatrix[9];
-        rangeMatrix[0] = M.at<float>(0,0);
-        rangeMatrix[1] = M.at<float>(0,1);
-        rangeMatrix[2] = M.at<float>(0,2)+range.start*M.at<float>(0,1);
-        rangeMatrix[3] = M.at<float>(1,0);
-        rangeMatrix[4] = M.at<float>(1,1);
-        rangeMatrix[5] = M.at<float>(1,2)+range.start*M.at<float>(1,1);
-        rangeMatrix[6] = M.at<float>(2,0);
-        rangeMatrix[7] = M.at<float>(2,1);
-        rangeMatrix[8] = M.at<float>(2,2)+range.start*M.at<float>(2,1);
-
-        fcv2PlaneWarpPerspectiveu8(src1.data, src2.data, src1.cols, src1.rows, src1.step, src2.step, dst1_ptr, dst2_ptr,
-            dsize.width, rangeHeight, dst1.step, dst2.step, rangeMatrix);
+        rangeMatrix[0] = M[0];
+        rangeMatrix[1] = M[1];
+        rangeMatrix[2] = M[2]+range.start*M[1];
+        rangeMatrix[3] = M[3];
+        rangeMatrix[4] = M[4];
+        rangeMatrix[5] = M[5]+range.start*M[4];
+        rangeMatrix[6] = M[6];
+        rangeMatrix[7] = M[7];
+        rangeMatrix[8] = M[8]+range.start*M[7];
+
+        if ((src2.empty()) || (dst2.empty()))
+        {
+            fcvWarpPerspectiveu8_v5(src1.data, src1.cols, src1.rows, src1.step, src1.channels(), dst1_ptr, dst1.cols, rangeHeight,
+                dst1.step, rangeMatrix, interpolation, borderType, borderValue);
+        }
+        else
+        {
+            uchar* dst2_ptr = dst2.data + range.start * dst2.step;
+            fcv2PlaneWarpPerspectiveu8(src1.data, src2.data, src1.cols, src1.rows, src1.step, src2.step, dst1_ptr, dst2_ptr,
+                dst1.cols, rangeHeight, dst1.step, dst2.step, rangeMatrix);
+        }
     }
 
     private:
-    Mat         src1;
-    Mat         src2;
-    Mat         dst1;
-    Mat         dst2;
-    Mat         M;
-    Size        dsize;
+
+    const Mat&              src1;
+    const Mat&              src2;
+    Mat&                    dst1;
+    Mat&                    dst2;
+    const float*            M;
+    fcvInterpolationType    interpolation;
+    fcvBorderType           borderType;
+    int                     borderValue;
 
     FcvWarpPerspectiveLoop_Invoker(const FcvWarpPerspectiveLoop_Invoker &);  // = delete;
     const FcvWarpPerspectiveLoop_Invoker& operator= (const FcvWarpPerspectiveLoop_Invoker &);  // = delete;
@@ -68,8 +71,249 @@ void warpPerspective2Plane(InputArray _src1, InputArray _src2, OutputArray _dst1
     CV_Assert(!_src2.empty() && _src2.type() == CV_8UC1);
     CV_Assert(!_M0.empty());
 
+    Mat src1 = _src1.getMat();
+    Mat src2 = _src2.getMat();
+
+    _dst1.create(dsize, src1.type());
+    _dst2.create(dsize, src2.type());
+    Mat dst1 = _dst1.getMat();
+    Mat dst2 = _dst2.getMat();
+
+    Mat M0 = _M0.getMat();
+    CV_Assert((M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3);
+    float matrix[9];
+    Mat M(3, 3, CV_32F, matrix);
+    M0.convertTo(M, M.type());
+
+    int nThreads = getNumThreads();
+    int nStripes = nThreads > 1 ? 2*nThreads : 1;
+
+    cv::parallel_for_(cv::Range(0, dsize.height),
+        FcvWarpPerspectiveLoop_Invoker(src1, src2, dst1, dst2, matrix), nStripes);
+}
+
+void warpPerspective(InputArray _src, OutputArray _dst, InputArray _M0, Size dsize, int interpolation, int borderType,
+    const Scalar&  borderValue)
+{
+    Mat src = _src.getMat();
+
+    _dst.create(dsize, src.type());
+    Mat dst = _dst.getMat();
+
+    Mat M0 = _M0.getMat();
+    CV_Assert((M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3);
+    float matrix[9];
+    Mat M(3, 3, CV_32F, matrix);
+    M0.convertTo(M, M.type());
+
+    // Do not support inplace case
+    CV_Assert(src.data != dst.data);
+    // Only support CV_8U
+    CV_Assert(src.depth() == CV_8U);
+
+    INITIALIZATION_CHECK;
+
+    fcvBorderType           fcvBorder;
+    uint8_t                 fcvBorderValue = 0;
+    fcvInterpolationType    fcvInterpolation;
+
+    switch (borderType)
+    {
+        case BORDER_CONSTANT:
+        {
+            // Border value should be same
+            CV_Assert((borderValue[0] == borderValue[1]) &&
+                      (borderValue[0] == borderValue[2]) &&
+                      (borderValue[0] == borderValue[3]));
+
+            fcvBorder       = fcvBorderType::FASTCV_BORDER_CONSTANT;
+            fcvBorderValue  = static_cast<uint8_t>(borderValue[0]);
+            break;
+        }
+        case BORDER_REPLICATE:
+        {
+            fcvBorder = fcvBorderType::FASTCV_BORDER_REPLICATE;
+            break;
+        }
+        case BORDER_TRANSPARENT:
+        {
+            fcvBorder = fcvBorderType::FASTCV_BORDER_UNDEFINED;
+            break;
+        }
+        default:
+            CV_Error(cv::Error::StsBadArg, cv::format("Border type:%d is not supported", borderType));
+    }
+
+    switch(interpolation)
+    {
+        case INTER_NEAREST:
+        {
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_NEAREST_NEIGHBOR;
+            break;
+        }
+        case INTER_LINEAR:
+        {
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_BILINEAR;
+            break;
+        }
+        case INTER_AREA:
+        {
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_AREA;
+            break;
+        }
+        default:
+            CV_Error(cv::Error::StsBadArg, cv::format("Interpolation type:%d is not supported", interpolation));
+    }
+
+    int nThreads = cv::getNumThreads();
+    int nStripes = nThreads > 1 ? 2*nThreads : 1;
+
+    // placeholder
+    Mat tmp;
+
     cv::parallel_for_(cv::Range(0, dsize.height),
-        FcvWarpPerspectiveLoop_Invoker(_src1, _src2, _dst1, _dst2, _M0, dsize), 1);
+        FcvWarpPerspectiveLoop_Invoker(src, tmp, dst, tmp, matrix, fcvInterpolation, fcvBorder, fcvBorderValue), nStripes);
+}
+
+void warpAffine(InputArray _src, OutputArray _dst, InputArray _M, Size dsize,
+                int interpolation, int borderValue)
+{
+    INITIALIZATION_CHECK;
+    CV_Assert(!_src.empty());
+    CV_Assert(!_M.empty());
+
+    Mat src = _src.getMat();
+    Mat M = _M.getMat();
+
+    CV_CheckEQ(M.rows, 2, "Affine Matrix must have 2 rows");
+    CV_Check(M.cols, M.cols == 2 || M.cols == 3, "Affine Matrix must be 2x2 or 2x3");
+
+    if (M.rows == 2 && M.cols == 2)
+    {
+        CV_CheckTypeEQ(src.type(), CV_8UC1, "2x2 matrix transformation only supports CV_8UC1");
+
+        // Check if src is a ROI
+        Size wholeSize;
+        Point ofs;
+        src.locateROI(wholeSize, ofs);
+        bool isROI = (wholeSize.width > src.cols || wholeSize.height > src.rows);
+
+        Mat fullImage;
+        Point2f center;
+
+        if (isROI)
+        {
+            center.x = ofs.x + src.cols / 2.0f;
+            center.y = ofs.y + src.rows / 2.0f;
+
+            CV_Check(center.x, center.x >= 0 && center.x < wholeSize.width, "ROI center X is outside full image bounds");
+            CV_Check(center.y, center.y >= 0 && center.y < wholeSize.height, "ROI center Y is outside full image bounds");
+
+            size_t offset = ofs.y * src.step + ofs.x * src.elemSize();
+            fullImage = Mat(wholeSize, src.type(), src.data - offset);
+        }
+        else
+        {
+            // Use src as is, center at image center
+            fullImage = src;
+            center.x = src.cols / 2.0f;
+            center.y = src.rows / 2.0f;
+
+            CV_LOG_WARNING(NULL, "2x2 matrix with non-ROI input. Using image center for patch extraction.");
+        }
+
+        float affineMatrix[4] = {
+            M.at<float>(0, 0), M.at<float>(0, 1),
+            M.at<float>(1, 0), M.at<float>(1, 1)};
+
+        float position[2] = {center.x, center.y};
+
+        _dst.create(dsize, src.type());
+        Mat dst = _dst.getMat();
+        dst.step = dst.cols * src.elemSize();
+
+        int status = fcvTransformAffineu8_v2(
+            (const uint8_t *)fullImage.data,
+            fullImage.cols, fullImage.rows, fullImage.step,
+            position,
+            affineMatrix,
+            (uint8_t *)dst.data,
+            dst.cols, dst.rows, dst.step);
+
+        if (status != 0)
+        {
+            CV_Error(Error::StsInternal, "FastCV patch extraction failed");
+        }
+
+        return;
+    }
+
+    // Validate 2x3 matrix for standard transformation
+    CV_CheckEQ(M.cols, 3, "Matrix must be 2x3 for standard affine transformation");
+    CV_Check(src.type(), src.type() == CV_8UC1 || src.type() == CV_8UC3, "Standard transformation supports CV_8UC1 or CV_8UC3");
+
+    float32_t affineMatrix[6] = {
+        M.at<float>(0, 0), M.at<float>(0, 1), M.at<float>(0, 2),
+        M.at<float>(1, 0), M.at<float>(1, 1), M.at<float>(1, 2)};
+
+    _dst.create(dsize, src.type());
+    Mat dst = _dst.getMat();
+
+    if (src.channels() == 1)
+    {
+        fcvStatus status;
+        fcvInterpolationType fcvInterpolation;
+
+        switch (interpolation)
+        {
+        case cv::InterpolationFlags::INTER_NEAREST:
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_NEAREST_NEIGHBOR;
+            break;
+        case cv::InterpolationFlags::INTER_LINEAR:
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_BILINEAR;
+            break;
+        case cv::InterpolationFlags::INTER_AREA:
+            fcvInterpolation = FASTCV_INTERPOLATION_TYPE_AREA;
+            break;
+        default:
+            CV_Error(cv::Error::StsBadArg, "Unsupported interpolation type");
+        }
+
+        status = fcvTransformAffineClippedu8_v3(
+            (const uint8_t *)src.data, src.cols, src.rows, src.step,
+            affineMatrix,
+            (uint8_t *)dst.data, dst.cols, dst.rows, dst.step,
+            NULL,
+            fcvInterpolation,
+            FASTCV_BORDER_CONSTANT,
+            borderValue);
+
+        if (status != FASTCV_SUCCESS)
+        {
+            std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown";
+            CV_Error(cv::Error::StsInternal, "FastCV error: " + s);
+        }
+    }
+    else if (src.channels() == 3)
+    {
+        CV_LOG_INFO(NULL, "warpAffine: 3-channel images use bicubic interpolation internally.");
+
+        std::vector<uint32_t> dstBorder;
+        try
+        {
+            dstBorder.resize(dsize.height * 2);
+        }
+        catch (const std::bad_alloc &)
+        {
+            CV_Error(Error::StsNoMem, "Failed to allocate border array");
+        }
+
+        fcv3ChannelTransformAffineClippedBCu8(
+            (const uint8_t *)src.data, src.cols, src.rows, src.step[0],
+            affineMatrix,
+            (uint8_t *)dst.data, dst.cols, dst.rows, dst.step[0],
+            dstBorder.data());
+    }
 }
 
 } // fastcv::
diff --git a/modules/fastcv/test/test_blur_dsp.cpp b/modules/fastcv/test/test_blur_dsp.cpp
new file mode 100644
index 00000000000..2be6dd3475a
--- /dev/null
+++ b/modules/fastcv/test/test_blur_dsp.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+typedef testing::TestWithParam<tuple<Size, int, int>> Filter2DTest_DSP;
+
+TEST_P(Filter2DTest_DSP, accuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size srcSize = get<0>(GetParam());
+    int ddepth   = get<1>(GetParam());
+    int ksize    = get<2>(GetParam());
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(srcSize, CV_8U);
+
+    cv::Mat kernel;
+    cv::Mat dst, ref;
+    kernel.allocator = cv::fastcv::getQcAllocator();
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    switch (ddepth)
+    {
+        case CV_8U:
+        case CV_16S:
+        {
+            kernel.create(ksize,ksize,CV_8S);
+            break;
+        }
+        case CV_32F:
+        {
+            kernel.create(ksize,ksize,CV_32F);
+            break;
+        }
+        default:
+            return;
+    }
+
+    RNG& rng = cv::theRNG();
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(255));
+    cvtest::randUni(rng, kernel, Scalar::all(INT8_MIN), Scalar::all(INT8_MAX));
+
+    cv::fastcv::dsp::filter2D(src, dst, ddepth, kernel);
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    cv::filter2D(src, ref, ddepth, kernel);
+    cv::Mat difference;
+    dst.convertTo(dst, CV_8U);
+    ref.convertTo(ref, CV_8U);
+    cv::absdiff(dst, ref, difference);
+
+    int num_diff_pixels = cv::countNonZero(difference);
+    EXPECT_LT(num_diff_pixels, (src.rows+src.cols)*ksize);
+}
+
+INSTANTIATE_TEST_CASE_P(FastCV_Extension, Filter2DTest_DSP, Combine(
+/*image size*/      Values(perf::szVGA, perf::sz720p),
+/*dst depth*/      Values(CV_8U,CV_16S,CV_32F),
+/*kernel size*/    Values(3, 5, 7, 9, 11)
+));
+
+}} // namespaces opencv_test, ::
diff --git a/modules/fastcv/test/test_edges_dsp.cpp b/modules/fastcv/test/test_edges_dsp.cpp
new file mode 100644
index 00000000000..7bf41d3ba7f
--- /dev/null
+++ b/modules/fastcv/test/test_edges_dsp.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+TEST(DSP_CannyTest, accuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    cv::imread(cvtest::findDataFile("cv/detectors_descriptors_evaluation/planar/box_in_scene.png"), src, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty()) << "Could not read the image file.";
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    int lowThreshold = 0;
+    int highThreshold = 150;
+
+    cv::fastcv::dsp::Canny(src, dst, lowThreshold, highThreshold, 3, true);
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    EXPECT_FALSE(dst.empty());
+    EXPECT_EQ(src.size(), dst.size());
+}
+
+}
+}
diff --git a/modules/fastcv/test/test_fft_dsp.cpp b/modules/fastcv/test/test_fft_dsp.cpp
new file mode 100644
index 00000000000..49c20c4bfeb
--- /dev/null
+++ b/modules/fastcv/test/test_fft_dsp.cpp
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+class FFT_DSPExtTest : public ::testing::TestWithParam<cv::Size> {};
+
+TEST_P(FFT_DSPExtTest, forward)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size size = GetParam();
+
+    RNG& rng = cv::theRNG();
+
+    Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    Mat srcFloat;
+    src.convertTo(srcFloat, CV_32F);
+
+    Mat dst, ref;
+    dst.allocator = cv::fastcv::getQcAllocator();
+    cv::fastcv::dsp::FFT(src, dst);
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    cv::dft(srcFloat, ref, DFT_COMPLEX_OUTPUT);
+
+    double normInf = cvtest::norm(dst, ref, cv::NORM_INF);
+    double normL2  = cvtest::norm(dst, ref, cv::NORM_L2)  / dst.size().area();
+
+    EXPECT_LT(normInf, 19.1); // for 512x512 case
+    EXPECT_LT(normL2, 18.0 / 256.0 );
+}
+
+TEST_P(FFT_DSPExtTest, inverse)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    Size size = GetParam();
+
+    RNG& rng = cv::theRNG();
+
+    Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(size, CV_8UC1);
+
+    cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256));
+
+    Mat srcFloat;
+    src.convertTo(srcFloat, CV_32F);
+
+    Mat fwd, back;
+    fwd.allocator = cv::fastcv::getQcAllocator();
+    back.allocator = cv::fastcv::getQcAllocator();
+
+    cv::fastcv::dsp::FFT(src, fwd);
+    cv::fastcv::dsp::IFFT(fwd, back);
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    Mat backFloat;
+    back.convertTo(backFloat, CV_32F);
+
+    Mat fwdRef, backRef;
+    cv::dft(srcFloat, fwdRef, DFT_COMPLEX_OUTPUT);
+    cv::idft(fwdRef, backRef, DFT_REAL_OUTPUT);
+
+    backRef *= 1./(src.size().area());
+
+    double normInf = cvtest::norm(backFloat, backRef, cv::NORM_INF);
+    double normL2  = cvtest::norm(backFloat, backRef, cv::NORM_L2)  / src.size().area();
+
+    EXPECT_LT(normInf, 9.16e-05);
+    EXPECT_LT(normL2,  1.228e-06);
+}
+
+INSTANTIATE_TEST_CASE_P(FastCV_Extension, FFT_DSPExtTest, ::testing::Values(Size(256, 256), Size(512, 512)));
+
+}} // namespaces opencv_test, ::
diff --git a/modules/fastcv/test/test_main.cpp b/modules/fastcv/test/test_main.cpp
index cc60576e96f..fe8a3c6c515 100644
--- a/modules/fastcv/test/test_main.cpp
+++ b/modules/fastcv/test/test_main.cpp
@@ -5,4 +5,9 @@
 
 #include "test_precomp.hpp"
 
-CV_TEST_MAIN("")
+static void initFastCVTests()
+{
+    cvtest::registerGlobalSkipTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+}
+
+CV_TEST_MAIN("", initFastCVTests())
diff --git a/modules/fastcv/test/test_precomp.hpp b/modules/fastcv/test/test_precomp.hpp
index 7ff8ed78049..5c172e71c54 100644
--- a/modules/fastcv/test/test_precomp.hpp
+++ b/modules/fastcv/test/test_precomp.hpp
@@ -9,3 +9,5 @@
 #include <opencv2/video.hpp>
 
 #include <opencv2/fastcv.hpp>
+
+#define CV_TEST_TAG_FASTCV_SKIP_DSP "fastcv_skip_dsp"
diff --git a/modules/fastcv/test/test_sad_dsp.cpp b/modules/fastcv/test/test_sad_dsp.cpp
new file mode 100644
index 00000000000..5c160e75028
--- /dev/null
+++ b/modules/fastcv/test/test_sad_dsp.cpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+using namespace cv::fastcv::dsp;
+
+namespace opencv_test { namespace {
+
+TEST(SadTest, accuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    // Create an 8x8 template patch
+    cv::Mat patch;
+    patch.allocator = cv::fastcv::getQcAllocator();
+    patch.create(8, 8, CV_8UC1);
+    patch.setTo(cv::Scalar(0));
+
+    // Create a source image
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    src.create(512, 512, CV_8UC1);
+    src.setTo(cv::Scalar(255));
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    cv::fastcv::dsp::sumOfAbsoluteDiffs(patch, src, dst);
+
+    EXPECT_FALSE(dst.empty());
+
+    //De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+}
+
+}
+}
diff --git a/modules/fastcv/test/test_scale.cpp b/modules/fastcv/test/test_scale.cpp
index b8e84218ed8..46ffa7d32f8 100644
--- a/modules/fastcv/test/test_scale.cpp
+++ b/modules/fastcv/test/test_scale.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -7,27 +7,21 @@
 
 namespace opencv_test { namespace {
 
-class ResizeBy2Test : public ::testing::TestWithParam<cv::Size> {};
-class ResizeBy4Test : public ::testing::TestWithParam<cv::Size> {};
-
 TEST(resizeDownBy2, accuracy)
 {
     cv::Mat inputImage = cv::imread(cvtest::findDataFile("cv/shared/box_in_scene.png"), cv::IMREAD_GRAYSCALE);
 
-    Size dsize;
     cv::Mat resized_image;
 
-    cv::fastcv::resizeDownBy2(inputImage, resized_image);
+    cv::fastcv::resizeDown(inputImage, resized_image, cv::Size(inputImage.cols / 2, inputImage.rows / 2), 0, 0);
 
     EXPECT_FALSE(resized_image.empty());
 
     cv::Mat resizedImageOpenCV;
     cv::resize(inputImage, resizedImageOpenCV, cv::Size(inputImage.cols / 2, inputImage.rows / 2), 0, 0, INTER_AREA);
 
-    // Calculate the maximum difference
     double maxVal = cv::norm(resized_image, resizedImageOpenCV, cv::NORM_INF);
 
-    // Assert if the difference is acceptable (max difference should be less than 10)
     CV_Assert(maxVal < 10 && "Difference between images is too high!");
 }
 
@@ -38,67 +32,56 @@ TEST(resizeDownBy4, accuracy)
     Size dsize;
     cv::Mat resized_image;
 
-    cv::fastcv::resizeDownBy4(inputImage, resized_image);
+    cv::fastcv::resizeDown(inputImage, resized_image, dsize, 0.25, 0.25);
 
     EXPECT_FALSE(resized_image.empty());
 
     cv::Mat resizedImageOpenCV;
     cv::resize(inputImage, resizedImageOpenCV, cv::Size(inputImage.cols / 4, inputImage.rows / 4), 0, 0, INTER_AREA);
 
-    // Calculate the maximum difference
     double maxVal = cv::norm(resized_image, resizedImageOpenCV, cv::NORM_INF);
 
-    // Assert if the difference is acceptable (max difference should be less than 10)
     CV_Assert(maxVal < 10 && "Difference between images is too high!");
 }
 
-TEST_P(ResizeBy2Test, ResizeBy2) {
-
-    //Size size = get<0>(GetParam());
-    Size size = GetParam();
-    cv::Mat inputImage(size, CV_8UC1);
-    randu(inputImage, Scalar::all(0), Scalar::all(255)); // Fill with random values
+TEST(resizeDownMN, accuracy)
+{
+    cv::Mat inputImage = cv::imread(cvtest::findDataFile("cv/cascadeandhog/images/class57.png"), cv::IMREAD_GRAYSCALE);
 
-    Size dsize;
     cv::Mat resized_image;
 
-    // Resize the image by a factor of 2
-    cv::fastcv::resizeDownBy2(inputImage, resized_image);
+    cv::fastcv::resizeDown(inputImage, resized_image, cv::Size(800, 640), 0, 0);
 
-    // Check if the output size is correct
-    EXPECT_EQ(resized_image.size().width, size.width * 0.5);
-    EXPECT_EQ(resized_image.size().height, size.height * 0.5);
+    EXPECT_FALSE(resized_image.empty());
+
+    cv::Mat resizedImageOpenCV;
+    cv::resize(inputImage, resizedImageOpenCV, cv::Size(800, 640), 0, 0, INTER_LINEAR);
+
+    double maxVal = cv::norm(resized_image, resizedImageOpenCV, cv::NORM_INF);
+
+    CV_Assert(maxVal < 78 && "Difference between images is too high!");
 }
 
-TEST_P(ResizeBy4Test, ResizeBy4) {
+TEST(resizeDownInterleaved, accuracy)
+{
+    cv::Mat inputImage = cv::Mat::zeros(512, 512, CV_8UC2);
+    cv::randu(inputImage, cv::Scalar(0), cv::Scalar(255));
 
-    //Size size = get<0>(GetParam());
-    Size size = GetParam();
-    cv::Mat inputImage(size, CV_8UC1);
-    randu(inputImage, Scalar::all(0), Scalar::all(255)); // Fill with random values
 
     Size dsize;
     cv::Mat resized_image;
 
-    // Resize the image by a factor of 4
-    cv::fastcv::resizeDownBy4(inputImage, resized_image);
+    cv::fastcv::resizeDown(inputImage, resized_image, dsize, 0.500, 0.125);
 
-    // Check if the output size is correct
-    EXPECT_EQ(resized_image.size().width, size.width * 0.25);
-    EXPECT_EQ(resized_image.size().height, size.height * 0.25);
-}
+    EXPECT_FALSE(resized_image.empty());
 
-INSTANTIATE_TEST_CASE_P(
-    ResizeTests,
-    ResizeBy2Test,
-    ::testing::Values(cv::Size(640, 480), cv::Size(1280, 720), cv::Size(1920, 1080)
-));
 
-INSTANTIATE_TEST_CASE_P(
-    ResizeTests,
-    ResizeBy4Test,
-    ::testing::Values(cv::Size(640, 480), cv::Size(1280, 720), cv::Size(1920, 1080)
-));
+    cv::Mat resizedImageOpenCV;
+    cv::resize(inputImage, resizedImageOpenCV, dsize, 0.500, 0.125, INTER_AREA);
 
+    double maxVal = cv::norm(resized_image, resizedImageOpenCV, cv::NORM_INF);
+
+    CV_Assert(maxVal < 10 && "Difference between images is too high!");
+}
 
 }} // namespaces opencv_test, ::
\ No newline at end of file
diff --git a/modules/fastcv/test/test_thresh_dsp.cpp b/modules/fastcv/test/test_thresh_dsp.cpp
new file mode 100644
index 00000000000..a475d928761
--- /dev/null
+++ b/modules/fastcv/test/test_thresh_dsp.cpp
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+*/
+
+#include "test_precomp.hpp"
+
+namespace opencv_test { namespace {
+
+TEST(ThresholdOtsuTest, accuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    //Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    cv::imread(cvtest::findDataFile("cv/detectors_descriptors_evaluation/planar/box_in_scene.png"), src, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty()) << "Could not read the image file.";
+
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+
+    bool type = 0;
+
+    cv::fastcv::dsp::thresholdOtsu(src, dst, type);
+
+    // De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    EXPECT_FALSE(dst.empty());
+    EXPECT_EQ(src.size(), dst.size());
+
+    // Compare the result against the reference cv::threshold function with Otsu's method
+    cv::Mat referenceDst;
+    cv::threshold(src, referenceDst, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);
+
+    double maxDifference = 10.0;
+    cv::Mat diff;
+    cv::absdiff(dst, referenceDst, diff);
+    double maxVal;
+    cv::minMaxLoc(diff, nullptr, &maxVal);
+
+    EXPECT_LE(maxVal, maxDifference) << "The custom threshold result differs from the reference result by more than the acceptable threshold.";
+}
+
+TEST(ThresholdOtsuTest, inPlaceAccuracy)
+{
+    applyTestTag(CV_TEST_TAG_FASTCV_SKIP_DSP);
+
+    // Initialize DSP
+    int initStatus = cv::fastcv::dsp::fcvdspinit();
+    ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP";
+
+    cv::Mat src;
+    src.allocator = cv::fastcv::getQcAllocator();
+    cv::imread(cvtest::findDataFile("cv/detectors_descriptors_evaluation/planar/box_in_scene.png"), src, cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty()) << "Could not read the image file.";
+
+    // Use the same buffer for in-place operation
+    cv::Mat dst;
+    dst.allocator = cv::fastcv::getQcAllocator();
+    src.copyTo(dst);
+
+    bool type = false;
+
+    // Call the thresholdOtsu function for in-place operation
+    cv::fastcv::dsp::thresholdOtsu(dst, dst, type);
+
+    // De-Initialize DSP
+    cv::fastcv::dsp::fcvdspdeinit();
+
+    // Check if the output is not empty
+    EXPECT_FALSE(dst.empty());
+    EXPECT_EQ(src.size(), dst.size());
+
+    // Compare the result against the reference cv::threshold function with Otsu's method
+    cv::Mat referenceDst;
+    cv::threshold(src, referenceDst, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);
+
+    double maxDifference = 10.0;
+    cv::Mat diff;
+    cv::absdiff(dst, referenceDst, diff);
+    double maxVal;
+    cv::minMaxLoc(diff, nullptr, &maxVal);
+
+    EXPECT_LE(maxVal, maxDifference) << "The in-place threshold result differs from the reference result by more than the acceptable threshold.";
+}
+
+}} // namespaces opencv_test, ::
diff --git a/modules/fastcv/test/test_warp.cpp b/modules/fastcv/test/test_warp.cpp
index 240262f93ca..72f32bda031 100644
--- a/modules/fastcv/test/test_warp.cpp
+++ b/modules/fastcv/test/test_warp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ * Copyright (c) 2024-2025 Qualcomm Innovation Center, Inc. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
 */
 
@@ -7,30 +7,19 @@
 
 namespace opencv_test { namespace {
 
-typedef testing::TestWithParam<cv::Size> WarpPerspective2Plane;
-
-TEST_P(WarpPerspective2Plane, accuracy)
+static void getInvertMatrix(Mat& src, Size dstSize, Mat& M)
 {
-    cv::Size dstSize = GetParam();
-    cv::Mat img = imread(cvtest::findDataFile("cv/shared/baboon.png"));
-    Mat src(img.rows, img.cols, CV_8UC1);
-    cvtColor(img,src,cv::COLOR_BGR2GRAY);
-    cv::Mat dst1, dst2, mat, ref1, ref2;
-    mat.create(3,3,CV_32FC1);
-    dst1.create(dstSize,CV_8UC1);
-    dst2.create(dstSize,CV_8UC1);
-
-    RNG rng = RNG((uint64)-1);
+    RNG& rng = cv::theRNG();
     Point2f s[4], d[4];
 
     s[0] = Point2f(0,0);
     d[0] = Point2f(0,0);
     s[1] = Point2f(src.cols-1.f,0);
-    d[1] = Point2f(dst1.cols-1.f,0);
+    d[1] = Point2f(dstSize.width-1.f,0);
     s[2] = Point2f(src.cols-1.f,src.rows-1.f);
-    d[2] = Point2f(dst1.cols-1.f,dst1.rows-1.f);
+    d[2] = Point2f(dstSize.width-1.f,dstSize.height-1.f);
     s[3] = Point2f(0,src.rows-1.f);
-    d[3] = Point2f(0,dst1.rows-1.f);
+    d[3] = Point2f(0,dstSize.height-1.f);
 
     float buffer[16];
     Mat tmp( 1, 16, CV_32FC1, buffer );
@@ -40,31 +29,182 @@ TEST_P(WarpPerspective2Plane, accuracy)
     {
         s[i].x += buffer[i*4]*src.cols/2;
         s[i].y += buffer[i*4+1]*src.rows/2;
-        d[i].x += buffer[i*4+2]*dst1.cols/2;
-        d[i].y += buffer[i*4+3]*dst1.rows/2;
+        d[i].x += buffer[i*4+2]*dstSize.width/2;
+        d[i].y += buffer[i*4+3]*dstSize.height/2;
     }
 
-    cv::getPerspectiveTransform( s, d ).convertTo( mat, mat.depth() );
+    cv::getPerspectiveTransform( s, d ).convertTo( M, M.depth() );
+
     // Invert the perspective matrix
-    invert(mat,mat);
+    invert(M,M);
+}
+
+static cv::Mat getInverseAffine(const cv::Mat& affine)
+{
+    // Extract the 2x2 part
+    cv::Mat rotationScaling = affine(cv::Rect(0, 0, 2, 2));
+
+    // Invert the 2x2 part
+    cv::Mat inverseRotationScaling;
+    cv::invert(rotationScaling, inverseRotationScaling);
+
+    // Extract the translation part
+    cv::Mat translation = affine(cv::Rect(2, 0, 1, 2));
+
+    // Compute the new translation
+    cv::Mat inverseTranslation = -inverseRotationScaling * translation;
+
+    // Construct the inverse affine matrix
+    cv::Mat inverseAffine = cv::Mat::zeros(2, 3, CV_32F);
+    inverseRotationScaling.copyTo(inverseAffine(cv::Rect(0, 0, 2, 2)));
+    inverseTranslation.copyTo(inverseAffine(cv::Rect(2, 0, 1, 2)));
+
+    return inverseAffine;
+}
+
+typedef testing::TestWithParam<cv::Size> WarpPerspective2Plane;
+
+TEST_P(WarpPerspective2Plane, accuracy)
+{
+    cv::Size dstSize = GetParam();
+    cv::Mat src = imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    EXPECT_FALSE(src.empty());
+
+    cv::Mat dst1, dst2, matrix, ref1, ref2;
+    matrix.create(3, 3, CV_32FC1);
 
-    cv::fastcv::warpPerspective2Plane(src, src, dst1, dst2, mat, dstSize);
-    cv::warpPerspective(src,ref1,mat,dstSize,(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP));
-    cv::warpPerspective(src,ref2,mat,dstSize,(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP));
+    getInvertMatrix(src, dstSize, matrix);
 
-    cv::Mat difference1, difference2, mask1,mask2;
+    cv::fastcv::warpPerspective2Plane(src, src, dst1, dst2, matrix, dstSize);
+    cv::warpPerspective(src, ref1, matrix, dstSize, (cv::INTER_LINEAR | cv::WARP_INVERSE_MAP),cv::BORDER_CONSTANT,Scalar(0));
+    cv::warpPerspective(src, ref2, matrix, dstSize, (cv::INTER_LINEAR | cv::WARP_INVERSE_MAP),cv::BORDER_CONSTANT,Scalar(0));
+
+    cv::Mat difference1, difference2, mask1, mask2;
     cv::absdiff(dst1, ref1, difference1);
     cv::absdiff(dst2, ref2, difference2);
+
+    // There are 1 or 2 difference in pixel value because algorithm is different, ignore those difference
     cv::threshold(difference1, mask1, 5, 255, cv::THRESH_BINARY);
     cv::threshold(difference2, mask2, 5, 255, cv::THRESH_BINARY);
     int num_diff_pixels_1 = cv::countNonZero(mask1);
     int num_diff_pixels_2 = cv::countNonZero(mask2);
 
-    EXPECT_LT(num_diff_pixels_1, src.size().area()*0.02);
-    EXPECT_LT(num_diff_pixels_2, src.size().area()*0.02);
+    // The border is different
+    EXPECT_LT(num_diff_pixels_1, (dstSize.width+dstSize.height)*5);
+    EXPECT_LT(num_diff_pixels_2, (dstSize.width+dstSize.height)*5);
+}
+
+typedef testing::TestWithParam<tuple<Size, int, int>> WarpPerspective;
+
+TEST_P(WarpPerspective, accuracy)
+{
+    cv::Size dstSize = get<0>(GetParam());
+    int interplation = get<1>(GetParam());
+    int borderType   = get<2>(GetParam());
+    cv::Scalar borderValue = Scalar::all(100);
+
+    cv::Mat src = imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    EXPECT_FALSE(src.empty());
+
+    cv::Mat dst, matrix, ref;
+    matrix.create(3, 3, CV_32FC1);
+
+    getInvertMatrix(src, dstSize, matrix);
+
+    cv::fastcv::warpPerspective(src, dst, matrix, dstSize, interplation, borderType, borderValue);
+    cv::warpPerspective(src, ref, matrix, dstSize, (interplation | cv::WARP_INVERSE_MAP), borderType, borderValue);
+
+    cv::Mat difference, mask;
+    cv::absdiff(dst, ref, difference);
+    cv::threshold(difference, mask, 10, 255, cv::THRESH_BINARY);
+    int num_diff_pixels = cv::countNonZero(mask);
+
+    EXPECT_LT(num_diff_pixels, src.size().area()*0.05);
 }
 
+INSTANTIATE_TEST_CASE_P(FastCV_Extension, WarpPerspective,Combine(
+                   ::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p),
+                   ::testing::Values(INTER_NEAREST, INTER_LINEAR, INTER_AREA),
+                   ::testing::Values(BORDER_CONSTANT, BORDER_REPLICATE, BORDER_TRANSPARENT)
+));
 INSTANTIATE_TEST_CASE_P(FastCV_Extension, WarpPerspective2Plane, Values(perf::szVGA, perf::sz720p, perf::sz1080p));
 
+TEST(WarpAffine3ChannelTest, accuracy)
+{
+    cv::Mat src = imread(cvtest::findDataFile("cv/shared/baboon.png"));
+
+    // Define the transformation matrix
+    cv::Mat M = (cv::Mat_<float>(2, 3) << 2.0, 0, -50.0, 0, 2.0, -50.0);
+
+    cv::Size dsize(src.cols, src.rows);
+
+    cv::Mat dst;
+
+    cv::fastcv::warpAffine(src, dst, M, dsize);
+
+    EXPECT_FALSE(dst.empty());
+}
+
+TEST(WarpAffineROITest, accuracy)
+{
+    cv::Mat src = cv::imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+
+    // Define the position and affine matrix
+    cv::Point2f position(src.cols / 2.0f, src.rows / 2.0f);
+
+    float angle = 180.0; // Rotation angle in degrees
+    float radians = angle * CV_PI / 180.0;
+    cv::Mat affine = (cv::Mat_<float>(2, 2) << cos(radians), -sin(radians), sin(radians), cos(radians));
+
+    cv::Mat patch;
+    cv::Mat roi = src(cv::Rect(0, 0, 100, 100));
+    cv::fastcv::warpAffine(roi, patch, affine, cv::Size(100, 100));
+
+    EXPECT_FALSE(patch.empty());
+    EXPECT_EQ(patch.size(), cv::Size(100, 100));
+    EXPECT_EQ(patch.type(), CV_8UC1);
+}
+
+typedef testing::TestWithParam<tuple<int, int>> WarpAffineTest;
+
+TEST_P(WarpAffineTest, accuracy)
+{
+    // Load the source image
+    cv::Mat src = cv::imread(cvtest::findDataFile("cv/shared/baboon.png"), cv::IMREAD_GRAYSCALE);
+    ASSERT_FALSE(src.empty());
+
+    float angle = 30.0;// Rotation angle in degrees
+    float scale = 0.5;// Scale factor
+    cv::Mat affine = cv::getRotationMatrix2D(cv::Point2f(100, 100), angle, scale);
+
+    // Compute the inverse affine matrix
+    cv::Mat inverseAffine = getInverseAffine(affine);
+
+    // Define the destination size
+    cv::Size dsize(src.cols, src.rows);
+
+    // Define the output matrix
+    cv::Mat dst;
+
+    // Get the parameters
+    int interpolation = std::get<0>(GetParam());
+    int borderValue = std::get<1>(GetParam());
+
+    // Perform the affine transformation
+    cv::fastcv::warpAffine(src, dst, inverseAffine, dsize, interpolation, borderValue);
+
+    // Check that the output is not empty
+    EXPECT_FALSE(dst.empty());
+}
+
+INSTANTIATE_TEST_CASE_P(
+    FastCV_Extension,
+    WarpAffineTest,
+    ::testing::Combine(
+        ::testing::Values(INTER_NEAREST, INTER_LINEAR, INTER_AREA),
+        ::testing::Values(0, 255) // Black and white borders
+    )
+);
+
 }
 }
\ No newline at end of file
diff --git a/modules/gapi/CMakeLists.txt b/modules/gapi/CMakeLists.txt
index 6757c853bd1..9f5ac94d74c 100644
--- a/modules/gapi/CMakeLists.txt
+++ b/modules/gapi/CMakeLists.txt
@@ -380,6 +380,15 @@ if(WIN32)
   ocv_target_link_libraries(${the_module} PRIVATE wsock32 ws2_32)
 endif()
 
+ocv_option(OPENCV_GAPI_MSMF "Build G-API with MS Media Foundation support" HAVE_MSMF)
+if(HAVE_MSMF AND OPENCV_GAPI_MSMF)
+  if(TARGET opencv_test_gapi)
+    ocv_target_compile_definitions(opencv_test_gapi PRIVATE -DHAVE_GAPI_MSMF)
+  endif()
+  ocv_target_compile_definitions(${the_module} PRIVATE -DHAVE_GAPI_MSMF)
+  ocv_target_link_libraries(${the_module} PRIVATE ocv.3rdparty.msmf)
+endif()
+
 if(HAVE_DIRECTML)
   ocv_target_compile_definitions(${the_module} PRIVATE HAVE_DIRECTML=1)
 endif()
diff --git a/modules/gapi/src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp b/modules/gapi/src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp
index becd893e095..a4eb4546f12 100644
--- a/modules/gapi/src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp
+++ b/modules/gapi/src/streaming/onevpl/demux/async_mfp_demux_data_provider.cpp
@@ -14,7 +14,9 @@ namespace cv {
 namespace gapi {
 namespace wip {
 namespace onevpl {
-#ifdef _WIN32
+
+#ifdef HAVE_GAPI_MSMF
+
 static HRESULT create_media_source(const std::string& url, IMFMediaSource **ppSource) {
     wchar_t sURL[MAX_PATH];
     GAPI_Assert(url.size() < MAX_PATH && "Windows MAX_PATH limit was reached");
@@ -785,7 +787,7 @@ bool MFPAsyncDemuxDataProvider::empty() const {
            (processing_locked_buffer_storage.size() == 0) &&
            (get_locked_buffer_size() == 0);
 }
-#else // _WIN32
+#else // HAVE_GAPI_MSMF
 
 MFPAsyncDemuxDataProvider::MFPAsyncDemuxDataProvider(const std::string&) {
     GAPI_Error("Unsupported: Microsoft Media Foundation is not available");
@@ -804,7 +806,7 @@ bool MFPAsyncDemuxDataProvider::empty() const {
     GAPI_Error("Unsupported: Microsoft Media Foundation is not available");
     return true;
 }
-#endif // _WIN32
+#endif // HAVE_GAPI_MSMF
 } // namespace onevpl
 } // namespace wip
 } // namespace gapi
diff --git a/modules/gapi/src/streaming/onevpl/demux/async_mfp_demux_data_provider.hpp b/modules/gapi/src/streaming/onevpl/demux/async_mfp_demux_data_provider.hpp
index aab237f4134..057e41e429a 100644
--- a/modules/gapi/src/streaming/onevpl/demux/async_mfp_demux_data_provider.hpp
+++ b/modules/gapi/src/streaming/onevpl/demux/async_mfp_demux_data_provider.hpp
@@ -16,7 +16,7 @@
 #include "streaming/onevpl/onevpl_export.hpp"
 #include <opencv2/gapi/streaming/onevpl/data_provider_interface.hpp>
 
-#ifdef _WIN32
+#ifdef HAVE_GAPI_MSMF
 #define NOMINMAX
 #include <mfapi.h>
 #include <mfidl.h>
@@ -104,7 +104,7 @@ struct GAPI_EXPORTS MFPAsyncDemuxDataProvider : public IDataProvider,
 } // namespace gapi
 } // namespace cv
 
-#else // _WIN32
+#else // HAVE_GAPI_MSMF
 namespace cv {
 namespace gapi {
 namespace wip {
@@ -121,6 +121,6 @@ struct GAPI_EXPORTS MFPAsyncDemuxDataProvider : public IDataProvider {
 } // namespace gapi
 } // namespace cv
 
-#endif // _WIN32
+#endif // HAVE_GAPI_MSMF
 #endif // HAVE_ONEVPL
 #endif // GAPI_STREAMING_ONEVPL_DEMUX_ASYNC_MFP_DEMUX_DATA_PROVIDER_HPP
diff --git a/modules/ml/include/opencv2/ml.hpp b/modules/ml/include/opencv2/ml.hpp
index d537ab7759b..b66a3004028 100644
--- a/modules/ml/include/opencv2/ml.hpp
+++ b/modules/ml/include/opencv2/ml.hpp
@@ -256,7 +256,7 @@ class CV_EXPORTS_W TrainData
 
     @param filename The input file name
     @param headerLineCount The number of lines in the beginning to skip; besides the header, the
-        function also skips empty lines and lines staring with `#`
+        function also skips empty lines and lines starting with `#`
     @param responseStartIdx Index of the first output variable. If -1, the function considers the
         last variable as the response
     @param responseEndIdx Index of the last output variable + 1. If -1, then there is single