diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp
index bea05328c81b..fff7cf837703 100644
--- a/sycl/include/CL/sycl/detail/pi.hpp
+++ b/sycl/include/CL/sycl/detail/pi.hpp
@@ -107,7 +107,7 @@ std::string platformInfoToString(pi_platform_info info);
 template <class To, class From> To cast(From value);
 
 // Holds the PluginInformation for the plugin that is bound.
-// Currently a global varaible is used to store OpenCL plugin information to be
+// Currently a global variable is used to store OpenCL plugin information to be
 // used with SYCL Interoperability Constructors.
 extern std::shared_ptr<plugin> GlobalPlugin;
 
diff --git a/sycl/plugins/cuda/CMakeLists.txt b/sycl/plugins/cuda/CMakeLists.txt
index bec6a2dd8ad2..ad2c646efabe 100644
--- a/sycl/plugins/cuda/CMakeLists.txt
+++ b/sycl/plugins/cuda/CMakeLists.txt
@@ -6,7 +6,8 @@ message(STATUS "Including the PI API CUDA backend.")
 
 find_package(CUDA 10.0 REQUIRED)
 
-add_library(cudadrv SHARED IMPORTED)
+# Make imported library global to use it within the project.
+add_library(cudadrv SHARED IMPORTED GLOBAL)
 
 set_target_properties(
   cudadrv PROPERTIES 
diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp
index 8a44c3ff6eb5..9dbffc9e1430 100644
--- a/sycl/plugins/cuda/pi_cuda.cpp
+++ b/sycl/plugins/cuda/pi_cuda.cpp
@@ -831,7 +831,8 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
                    pi_uint64{max_alloc});
   }
   case PI_DEVICE_INFO_IMAGE_SUPPORT: {
-    return getInfo(param_value_size, param_value, param_value_size_ret, false);
+    return getInfo(param_value_size, param_value, param_value_size_ret,
+                   PI_FALSE);
   }
   case PI_DEVICE_INFO_MAX_READ_IMAGE_ARGS: {
     return getInfo(param_value_size, param_value, param_value_size_ret, 0);
@@ -2783,6 +2784,11 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
   // PI interface supports higher version or the same version.
   strncpy(PluginInit->PluginVersion, SupportedVersion, 4);
 
+  // Set whole function table to zero to make it easier to detect if
+  // functions are not set up below.
+  std::memset(&(PluginInit->PiFunctionTable), 0,
+              sizeof(PluginInit->PiFunctionTable));
+
 // Forward calls to OpenCL RT.
 #define _PI_CL(pi_api, cuda_api)                                         \
   (PluginInit->PiFunctionTable).pi_api = (decltype(&::pi_api))(&cuda_api);
@@ -2837,6 +2843,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
   _PI_CL(piKernelRetain, cuda_piKernelRetain)
   _PI_CL(piKernelRelease, cuda_piKernelRelease)
   _PI_CL(piKernelSetExecInfo, cuda_piKernelSetExecInfo)
+
   // Event
   _PI_CL(piEventCreate, cuda_piEventCreate)
   _PI_CL(piEventGetInfo, cuda_piEventGetInfo)
@@ -2868,6 +2875,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
   _PI_CL(piEnqueueMemImageFill, cuda_piEnqueueMemImageFill)
   _PI_CL(piEnqueueMemBufferMap, cuda_piEnqueueMemBufferMap)
   _PI_CL(piEnqueueMemUnmap, cuda_piEnqueueMemUnmap)
+
   _PI_CL(piextKernelSetArgMemObj, cuda_piextKernelSetArgMemObj)
 
 #undef _PI_CL
diff --git a/sycl/test/aot/accelerator.cpp b/sycl/test/aot/accelerator.cpp
index c08d4998ff81..f64b4f743a9a 100644
--- a/sycl/test/aot/accelerator.cpp
+++ b/sycl/test/aot/accelerator.cpp
@@ -36,6 +36,7 @@ void simple_vadd(const std::array<T, N>& VA, const std::array<T, N>& VB,
           std::cerr << "Unknown async exception was caught." << std::endl;
         }
       }
+      throw "ERROR: Asynchronous exception(s)";
     });
 
   cl::sycl::range<1> numOfItems{N};
@@ -67,12 +68,12 @@ int main() {
   simple_vadd(D, E, F);
   for (unsigned int i = 0; i < array_size; i++) {
     if (C[i] != A[i] + B[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << C[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << C[i]
                 << "!\n";
       return 1;
     }
     if (F[i] != D[i] + E[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << F[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << F[i]
                 << "!\n";
       return 1;
     }
diff --git a/sycl/test/aot/cpu.cpp b/sycl/test/aot/cpu.cpp
index ab1e91de94bd..403efc7d6ead 100644
--- a/sycl/test/aot/cpu.cpp
+++ b/sycl/test/aot/cpu.cpp
@@ -36,6 +36,7 @@ void simple_vadd(const std::array<T, N>& VA, const std::array<T, N>& VB,
           std::cerr << "Unknown async exception was caught." << std::endl;
         }
       }
+      throw "ERROR: Asynchronous exception(s)";
     });
 
   cl::sycl::range<1> numOfItems{N};
@@ -67,12 +68,12 @@ int main() {
   simple_vadd(D, E, F);
   for (unsigned int i = 0; i < array_size; i++) {
     if (C[i] != A[i] + B[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << C[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << C[i]
                 << "!\n";
       return 1;
     }
     if (F[i] != D[i] + E[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << F[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << F[i]
                 << "!\n";
       return 1;
     }
diff --git a/sycl/test/aot/gpu.cpp b/sycl/test/aot/gpu.cpp
index ee81bba76814..e1d045659941 100644
--- a/sycl/test/aot/gpu.cpp
+++ b/sycl/test/aot/gpu.cpp
@@ -1,9 +1,11 @@
 // REQUIRES: ocloc, gpu
+// UNSUPPORTED: cuda
+// CUDA is not compatible with SPIR.
 
 // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice "-device skl" %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
-// XFAIL: cuda
+
 //==----- gpu.cpp - AOT compilation for gen devices using GEN compiler  ------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -36,6 +38,7 @@ void simple_vadd(const std::array<T, N>& VA, const std::array<T, N>& VB,
           std::cerr << "Unknown async exception was caught." << std::endl;
         }
       }
+      throw "ERROR: Asynchronous exception(s)";
     });
 
   cl::sycl::range<1> numOfItems{N};
@@ -67,12 +70,12 @@ int main() {
   simple_vadd(D, E, F);
   for (unsigned int i = 0; i < array_size; i++) {
     if (C[i] != A[i] + B[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << C[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << C[i]
                 << "!\n";
       return 1;
     }
     if (F[i] != D[i] + E[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << F[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << F[i]
                 << "!\n";
       return 1;
     }
diff --git a/sycl/test/aot/multiple-devices.cpp b/sycl/test/aot/multiple-devices.cpp
index d516c5cd443b..f7ceff094ef2 100644
--- a/sycl/test/aot/multiple-devices.cpp
+++ b/sycl/test/aot/multiple-devices.cpp
@@ -7,6 +7,8 @@
 //===------------------------------------------------------------------------===//
 
 // REQUIRES: opencl-aot, ocloc, aoc, cpu, gpu, accelerator
+// UNSUPPORTED: cuda
+// CUDA is not compatible with SPIR.
 
 // 1-command compilation case
 // Targeting CPU, GPU, FPGA
@@ -88,6 +90,7 @@ void simple_vadd(const std::array<T, N>& VA, const std::array<T, N>& VB,
           std::cerr << "Unknown async exception was caught." << std::endl;
         }
       }
+      throw "ERROR: Asynchronous exception(s)";
     });
 
   cl::sycl::range<1> numOfItems{N};
@@ -119,12 +122,12 @@ int main() {
   simple_vadd(D, E, F);
   for (unsigned int i = 0; i < array_size; i++) {
     if (C[i] != A[i] + B[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << C[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << C[i]
                 << "!\n";
       return 1;
     }
     if (F[i] != D[i] + E[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << F[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << F[i]
                 << "!\n";
       return 1;
     }
diff --git a/sycl/test/aot/with-llvm-bc.cpp b/sycl/test/aot/with-llvm-bc.cpp
index afff5546dac3..f97284dde6e3 100644
--- a/sycl/test/aot/with-llvm-bc.cpp
+++ b/sycl/test/aot/with-llvm-bc.cpp
@@ -40,6 +40,7 @@ void simple_vadd(const std::array<T, N>& VA, const std::array<T, N>& VB,
           std::cerr << "Unknown async exception was caught." << std::endl;
         }
       }
+      throw "ERROR: Asynchronous exception(s)";
     });
   
   cl::sycl::range<1> numOfItems{N};
@@ -71,12 +72,12 @@ int main() {
   simple_vadd(D, E, F);
   for (unsigned int i = 0; i < array_size; i++) {
     if (C[i] != A[i] + B[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << C[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << C[i]
                 << "!\n";
       return 1;
     }
     if (F[i] != D[i] + E[i]) {
-      std::cout << "The results are incorrect (element " << i << " is " << F[i]
+      std::cerr << "The results are incorrect (element " << i << " is " << F[i]
                 << "!\n";
       return 1;
     }
diff --git a/sycl/test/basic_tests/accessor/accessor.cpp b/sycl/test/basic_tests/accessor/accessor.cpp
index a769df2f6300..903ceec4dff5 100644
--- a/sycl/test/basic_tests/accessor/accessor.cpp
+++ b/sycl/test/basic_tests/accessor/accessor.cpp
@@ -11,8 +11,8 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "../../helpers.hpp"
 #include <CL/sycl.hpp>
-#include <cassert>
 
 namespace sycl {
 using namespace cl::sycl;
@@ -77,19 +77,19 @@ int main() {
     auto acc_src = buf_src.get_access<sycl::access::mode::read>();
     auto acc_dst = buf_dst.get_access<sycl::access::mode::read_write>();
 
-    assert(!acc_src.is_placeholder());
-    assert(acc_src.get_size() == sizeof(src));
-    assert(acc_src.get_count() == 2);
-    assert(acc_src.get_range() == sycl::range<1>(2));
+    CHECK(!acc_src.is_placeholder());
+    CHECK(acc_src.get_size() == sizeof(src));
+    CHECK(acc_src.get_count() == 2);
+    CHECK(acc_src.get_range() == sycl::range<1>(2));
 
     // Make sure that operator[] is defined for both size_t and id<1>.
     // Implicit conversion from IdxSzT to size_t guarantees that no
     // implicit conversion from size_t to id<1> will happen.
-    assert(acc_src[IdxSzT(0)] + acc_src[IdxID1(1)] == 10);
+    CHECK(acc_src[IdxSzT(0)] + acc_src[IdxID1(1)] == 10);
 
     acc_dst[0] = acc_src[0] + acc_src[IdxID1(0)];
     acc_dst[id1] = acc_src[1] + acc_src[IdxSzT(1)];
-    assert(dst[0] == 6 && dst[1] == 14);
+    CHECK(dst[0] == 6 && dst[1] == 14);
   }
 
   // Three-dimensional host accessor.
@@ -101,10 +101,10 @@ int main() {
       sycl::buffer<int, 3> buf(data, sycl::range<3>(2, 3, 4));
       auto acc = buf.get_access<sycl::access::mode::read_write>();
 
-      assert(!acc.is_placeholder());
-      assert(acc.get_size() == sizeof(data));
-      assert(acc.get_count() == 24);
-      assert(acc.get_range() == sycl::range<3>(2, 3, 4));
+      CHECK(!acc.is_placeholder());
+      CHECK(acc.get_size() == sizeof(data));
+      CHECK(acc.get_count() == 24);
+      CHECK(acc.get_range() == sycl::range<3>(2, 3, 4));
 
       for (int i = 0; i < 2; ++i)
         for (int j = 0; j < 3; ++j)
@@ -112,7 +112,7 @@ int main() {
             acc[IdxID3(i, j, k)] += acc[sycl::id<3>(i, j, k)];
     }
     for (int i = 0; i < 24; ++i) {
-      assert(data[i] == 2 * i);
+      CHECK(data[i] == 2 * i);
     }
   }
   int data = 5;
@@ -125,16 +125,16 @@ int main() {
 
     Queue.submit([&](sycl::handler &cgh) {
       auto acc = buf.get_access<sycl::access::mode::read_write>(cgh);
-      assert(!acc.is_placeholder());
-      assert(acc.get_size() == sizeof(int));
-      assert(acc.get_count() == 1);
-      assert(acc.get_range() == sycl::range<1>(1));
+      CHECK(!acc.is_placeholder());
+      CHECK(acc.get_size() == sizeof(int));
+      CHECK(acc.get_count() == 1);
+      CHECK(acc.get_range() == sycl::range<1>(1));
       cgh.single_task<class kernel>(
           [=]() { acc[IdxSzT(0)] += acc[IdxID1(0)]; });
     });
     Queue.wait();
   }
-  assert(data == 10);
+  CHECK(data == 10);
 
   // Device accessor with 2-dimensional subscript operators.
   {
@@ -158,7 +158,7 @@ int main() {
         for (int j = 0; j < 3; j++) {
           std::cout << "array[" << i << "][" << j << "]=" << array[i][j]
                     << std::endl;
-          assert(array[i][j] == i * 3 + j);
+          CHECK(array[i][j] == i * 3 + j);
         }
       }
     }
@@ -188,7 +188,7 @@ int main() {
           for (int k = 0; k < 4; k++) {
             std::cout << "array[" << i << "][" << j << "][" << k
                       << "]=" << array[i][j][k] << std::endl;
-            assert(array[i][j][k] == k + 4 * (j + 3 * i));
+            CHECK(array[i][j][k] == k + 4 * (j + 3 * i));
           }
         }
       }
@@ -211,11 +211,11 @@ int main() {
 
       auto host_acc = buf.get_access<sycl::access::mode::read>();
       for (int i = 0; i != 3; ++i)
-        assert(host_acc[i] == 42);
+        CHECK(host_acc[i] == 42);
 
     } catch (cl::sycl::exception e) {
-      std::cout << "SYCL exception caught: " << e.what();
-      return 1;
+      std::cerr << "SYCL exception caught: " << e.what();
+      throw;
     }
   }
 
@@ -236,8 +236,8 @@ int main() {
       auto host_acc =
         buf.get_access<sycl::access::mode::discard_read_write>();
     } catch (cl::sycl::exception e) {
-      std::cout << "SYCL exception caught: " << e.what();
-      return 1;
+      std::cerr << "SYCL exception caught: " << e.what();
+      throw;
     }
   }
 
@@ -262,7 +262,7 @@ int main() {
       }
       for (int i = 0; i < 10; i++) {
         std::cout << "array[" << i << "]=" << array[i] << std::endl;
-        assert(array[i] == 333);
+        CHECK(array[i] == 333);
       }
     }
   }
@@ -296,8 +296,8 @@ int main() {
       for (int i = 0; i < 10; i++) {
         std::cout << "array1[" << i << "]=" << array1[i] << std::endl;
         std::cout << "array2[" << i << "]=" << array2[i] << std::endl;
-        assert(array1[i] == 333);
-        assert(array2[i] == 666);
+        CHECK(array1[i] == 333);
+        CHECK(array2[i] == 666);
       }
     }
   }
@@ -326,7 +326,7 @@ int main() {
       }
       for (int i = 0; i < 10; i++) {
         std::cout << "array[" << i << "]=" << array[i] << std::endl;
-        assert(array[i] == 333);
+        CHECK(array[i] == 333);
       }
     }
   }
@@ -349,11 +349,11 @@ int main() {
 
       auto host_acc = buf.get_access<sycl::access::mode::read>();
       for (int i = 0; i != 3; ++i)
-        assert(host_acc[i] == 42);
+        CHECK(host_acc[i] == 42);
 
     } catch (cl::sycl::exception e) {
-      std::cout << "SYCL exception caught: " << e.what();
-      return 1;
+      std::cerr << "SYCL exception caught: " << e.what();
+      throw;
     }
   }
 
@@ -374,10 +374,10 @@ int main() {
           });
         });
       }
-      assert(data == 399);
+      CHECK(data == 399);
     } catch (sycl::exception e) {
-      std::cout << "SYCL exception caught: " << e.what();
-      return 1;
+      std::cerr << "SYCL exception caught: " << e.what();
+      throw;
     }
   }
 
@@ -424,8 +424,8 @@ int main() {
                    sycl::access::target::host_buffer>
         acc6(buf3, sycl::range<1>(1));
 
-    assert(acc4 == 2);
-    assert(acc5[0] == 4);
-    assert(acc6[0] == 6);
+    CHECK(acc4 == 2);
+    CHECK(acc5[0] == 4);
+    CHECK(acc6[0] == 6);
   }
 }
diff --git a/sycl/test/basic_tests/buffer/buffer_full_copy.cpp b/sycl/test/basic_tests/buffer/buffer_full_copy.cpp
index f729f8d6d96a..37b80bf039ce 100644
--- a/sycl/test/basic_tests/buffer/buffer_full_copy.cpp
+++ b/sycl/test/basic_tests/buffer/buffer_full_copy.cpp
@@ -1,3 +1,6 @@
+// XFAIL: cuda
+// TODO: Fix fail for CUDA.
+//
 // RUN: %clangxx %s -o %t1.out -lsycl
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %clangxx -fsycl  -fsycl-targets=%sycl_triple  %s -o %t2.out
@@ -6,9 +9,6 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t2.out
 // RUN: %ACC_RUN_PLACEHOLDER %t2.out
 
-// TODO: cuda_piEnqueueMemBufferCopy not implemented
-// XFAIL: cuda
-
 //==------------- buffer_full_copy.cpp - SYCL buffer basic test ------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -16,7 +16,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
+#include "../../helpers.hpp"
 #include <CL/sycl.hpp>
 #include <cassert>
 
@@ -50,9 +50,9 @@ void check_copy_device_to_host(cl::sycl::queue &Queue) {
     for (int i = 0; i < size; ++i) {
       for (int j = 0; j < size; ++j)
         if (offset <= i && i < offset + 2 && offset <= j && j < offset + 2) {
-          assert(acc[i][j] == 15);
+          CHECK(acc[i][j] == 15);
         } else {
-          assert(acc[i][j] == 13);
+          CHECK(acc[i][j] == 13);
         }
     }
   }
@@ -84,7 +84,7 @@ void check_fill(cl::sycl::queue &Queue) {
   {
     auto acc_1 = buf_1.get_access<cl::sycl::access::mode::read>();
     for (int i = 0; i < size; ++i)
-      assert(expected_res_1[i] == acc_1[i]);
+      CHECK(expected_res_1[i] == acc_1[i]);
   }
 }
 
@@ -121,10 +121,10 @@ void check_copy_host_to_device(cl::sycl::queue &Queue) {
 
     // check that there was no data corruption/loss
     for (int i = 0; i < size; ++i)
-      assert(expected_res_1[i] == acc_1[i]);
+      CHECK(expected_res_1[i] == acc_1[i]);
 
     for (int i = 0; i < size / 2; ++i)
-      assert(expected_res_2[i] == acc_2[i]);
+      CHECK(expected_res_2[i] == acc_2[i]);
   }
 
   cl::sycl::buffer<float, 2> buf_3({size, size});
@@ -164,24 +164,21 @@ void check_copy_host_to_device(cl::sycl::queue &Queue) {
     // check that there was no data corruption/loss
     for (int i = 0; i < size; ++i) {
       for (int j = 0; j < size; ++j)
-        assert(expected_res_3[i * size + j] == acc_1[i][j]);
+        CHECK(expected_res_3[i * size + j] == acc_1[i][j]);
     }
 
     for (int i = 0; i < size / 2; ++i)
       for (int j = 0; j < size / 2; ++j)
-        assert(expected_res_4[i * size / 2 + j] == acc_2[i][j]);
+        CHECK(expected_res_4[i * size / 2 + j] == acc_2[i][j]);
   }
 }
 
 int main() {
-  try {
-    cl::sycl::queue Queue;
-    check_copy_host_to_device(Queue);
-    check_copy_device_to_host(Queue);
-    check_fill(Queue);
-  } catch (cl::sycl::exception &ex) {
-    std::cerr << ex.what() << std::endl;
-  }
+  // Not catching exceptions to make test fail instead.
+  cl::sycl::queue Queue;
+  check_copy_host_to_device(Queue);
+  check_copy_device_to_host(Queue);
+  check_fill(Queue);
 
   return 0;
 }
diff --git a/sycl/test/basic_tests/buffer/reinterpret.cpp b/sycl/test/basic_tests/buffer/reinterpret.cpp
index 627371095a8a..74944af71b1f 100644
--- a/sycl/test/basic_tests/buffer/reinterpret.cpp
+++ b/sycl/test/basic_tests/buffer/reinterpret.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../../helpers.hpp"
 #include <CL/sycl.hpp>
-
 #include <climits>
 
 // This tests verifies basic cases of using cl::sycl::buffer::reinterpret
@@ -69,8 +69,8 @@ int main() {
   {
     auto acc = buf_i.get_access<cl::sycl::access::mode::read>();
     if (acc[0] != UINT_MAX) {
-      std::cout << acc[0] << std::endl;
-      std::cout << "line: " << __LINE__ << " array[" << 0 << "] is " << acc[0]
+      std::cerr << acc[0] << std::endl;
+      std::cerr << "line: " << __LINE__ << " array[" << 0 << "] is " << acc[0]
                 << " expected " << UINT_MAX << std::endl;
       failed = true;
     }
@@ -97,7 +97,7 @@ int main() {
     for (auto i = 0u; i < r1d.size(); i++) {
       size_t expected = (i % 4) ? 0 : 1;
       if (acc[i] != expected) {
-        std::cout << "line: " << __LINE__ << " array[" << i << "] is " << acc[i]
+        std::cerr << "line: " << __LINE__ << " array[" << i << "] is " << acc[i]
                   << " expected " << expected << std::endl;
         failed = true;
       }
@@ -130,7 +130,7 @@ int main() {
         cl::sycl::id<1>{offset}, cl::sycl::range<1>{sub_buf_size}, val);
 
     for (std::size_t i = 0; i < sub_buf_size + offset; ++i) {
-      assert(data[i] == expected_data[i] &&
+      CHECK(data[i] == expected_data[i] &&
              "1D sub buffer int->char reinterpret failed");
     }
   }
@@ -153,7 +153,7 @@ int main() {
         cl::sycl::id<1>{offset}, cl::sycl::range<1>{sub_buf_size}, val);
 
     for (std::size_t i = 0; i < sub_buf_size + offset; ++i) {
-      assert(data[i] == expected_data[i] &&
+      CHECK(data[i] == expected_data[i] &&
              "1D sub buffer char->int reinterpret failed");
     }
   }
@@ -199,7 +199,7 @@ int main() {
 
     for (std::size_t i = 0; i < rows; ++i)
       for (std::size_t j = 0; j < cols; ++j)
-        assert(data[i * cols + j] == expected_data[i * cols + j] &&
+        CHECK(data[i * cols + j] == expected_data[i * cols + j] &&
                "2D->1D->sub buffer reinterpret failed");
   }
 
@@ -227,7 +227,7 @@ int main() {
 
     for (std::size_t i = 0; i < buf_row; ++i)
       for (std::size_t j = 0; j < buf_col; ++j)
-        assert(data[i * buf_col + j] == expected_data[i * buf_col + j] &&
+        CHECK(data[i * buf_col + j] == expected_data[i * buf_col + j] &&
                "2D sub buffer int->char reinterpret failed");
   }
 
@@ -252,8 +252,8 @@ int main() {
 
     for (std::size_t i = 0; i < buf_row; ++i)
       for (std::size_t j = 0; j < buf_col; ++j)
-        assert(data[i * buf_col + j] == expected_data[i * buf_col + j] &&
-               "2D sub buffer int->char reinterpret failed");
+        CHECK(data[i * buf_col + j] == expected_data[i * buf_col + j] &&
+              "2D sub buffer int->char reinterpret failed");
   }
 
   return failed;
diff --git a/sycl/test/basic_tests/buffer/subbuffer.cpp b/sycl/test/basic_tests/buffer/subbuffer.cpp
index abd821deb8ff..6fecb0ed8c35 100644
--- a/sycl/test/basic_tests/buffer/subbuffer.cpp
+++ b/sycl/test/basic_tests/buffer/subbuffer.cpp
@@ -1,10 +1,11 @@
+// XFAIL: cuda
+// TODO: Fix fail for CUDA.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// XFAIL: cuda
-// TODO: cuda fail due to unimplemented param_name 4121 in cuda_piDeviceGetInfo
 
 //==---------- subbuffer.cpp --- sub-buffer basic test ---------------------==//
 //
@@ -18,6 +19,7 @@
 // 1) Correct results after usage of different type of accessors to sub buffer
 // 2) Exceptions if we trying to create sub buffer not according to spec
 
+#include "../../helpers.hpp"
 #include <CL/sycl.hpp>
 #include <iostream>
 #include <numeric>
@@ -50,12 +52,12 @@ void checkHostAccessor(cl::sycl::queue &q) {
     {
       auto host_acc = subbuf.get_access<cl::sycl::access::mode::read>();
       for (int i = 0; i < 10; ++i)
-        assert(host_acc[i] == ((size / 2 + i) * -100) &&
-               "Sub buffer host accessor test failed.");
+        CHECK(host_acc[i] == ((size / 2 + i) * -100) &&
+              "Sub buffer host accessor test failed.");
     }
   }
-  assert(data[0] == 0 && data[size - 1] == (size - 1) &&
-         data[size / 2] == (size / 2 * -100) && "Loss of data");
+  CHECK(data[0] == 0 && data[size - 1] == (size - 1) &&
+        data[size / 2] == (size / 2 * -100) && "Loss of data");
 }
 
 void check1DSubBuffer(cl::sycl::queue &q) {
@@ -105,16 +107,16 @@ void check1DSubBuffer(cl::sycl::queue &q) {
 
   } catch (const cl::sycl::exception &e) {
     std::cerr << e.what() << std::endl;
-    assert(false && "Exception was caught");
+    CHECK(false && "Exception was caught");
   }
 
   for (int i = offset; i < subbuf_size; ++i)
-    assert(vec[i] == (i > 34 ? i * 10 : i * -10) &&
-           "Invalid result in 1d sub buffer");
+    CHECK(vec[i] == (i > 34 ? i * 10 : i * -10) &&
+          "Invalid result in 1d sub buffer");
 
   for (int i = 0; i < subbuf_size; ++i)
-    assert(vec2[i] == (i < 3 ? (32 + i) : (32 + i) * -1) &&
-           "Invalid result in 1d sub buffer");
+    CHECK(vec2[i] == (i < 3 ? (32 + i) : (32 + i) * -1) &&
+          "Invalid result in 1d sub buffer");
 }
 
 void checkExceptions() {
@@ -127,7 +129,7 @@ void checkExceptions() {
   try {
     cl::sycl::buffer<int, 2> sub_buf{buf2d, /*offset*/ cl::sycl::range<2>{2, 0},
                                      /*size*/ cl::sycl::range<2>{2, 2}};
-    assert(!"non contiguous region exception wasn't caught");
+    CHECK(!"non contiguous region exception wasn't caught");
   } catch (const cl::sycl::invalid_object_error &e) {
     std::cerr << e.what() << std::endl;
   }
@@ -135,7 +137,7 @@ void checkExceptions() {
   try {
     cl::sycl::buffer<int, 2> sub_buf{buf2d, /*offset*/ cl::sycl::range<2>{2, 2},
                                      /*size*/ cl::sycl::range<2>{2, 6}};
-    assert(!"non contiguous region exception wasn't caught");
+    CHECK(!"non contiguous region exception wasn't caught");
   } catch (const cl::sycl::invalid_object_error &e) {
     std::cerr << e.what() << std::endl;
   }
@@ -144,7 +146,7 @@ void checkExceptions() {
     cl::sycl::buffer<int, 3> sub_buf{buf3d,
                                      /*offset*/ cl::sycl::range<3>{0, 2, 1},
                                      /*size*/ cl::sycl::range<3>{1, 2, 3}};
-    assert(!"non contiguous region exception wasn't caught");
+    CHECK(!"non contiguous region exception wasn't caught");
   } catch (const cl::sycl::invalid_object_error &e) {
     std::cerr << e.what() << std::endl;
   }
@@ -153,7 +155,7 @@ void checkExceptions() {
     cl::sycl::buffer<int, 3> sub_buf{buf3d,
                                      /*offset*/ cl::sycl::range<3>{0, 0, 0},
                                      /*size*/ cl::sycl::range<3>{2, 3, 4}};
-    assert(!"non contiguous region exception wasn't caught");
+    CHECK(!"non contiguous region exception wasn't caught");
   } catch (const cl::sycl::invalid_object_error &e) {
     std::cerr << e.what() << std::endl;
   }
@@ -162,7 +164,7 @@ void checkExceptions() {
   try {
     cl::sycl::buffer<int, 2> sub_buf{buf2d, /*offset*/ cl::sycl::range<2>{2, 2},
                                      /*size*/ cl::sycl::range<2>{2, 8}};
-    assert(!"out of bounds exception wasn't caught");
+    CHECK(!"out of bounds exception wasn't caught");
   } catch (const cl::sycl::invalid_object_error &e) {
     std::cerr << e.what() << std::endl;
   }
@@ -171,7 +173,7 @@ void checkExceptions() {
     cl::sycl::buffer<int, 3> sub_buf{buf3d,
                                      /*offset*/ cl::sycl::range<3>{1, 1, 1},
                                      /*size*/ cl::sycl::range<3>{1, 1, 4}};
-    assert(!"out of bounds exception wasn't caught");
+    CHECK(!"out of bounds exception wasn't caught");
   } catch (const cl::sycl::invalid_object_error &e) {
     std::cerr << e.what() << std::endl;
   }
@@ -180,7 +182,7 @@ void checkExceptions() {
     cl::sycl::buffer<int, 3> sub_buf{buf3d,
                                      /*offset*/ cl::sycl::range<3>{3, 3, 0},
                                      /*size*/ cl::sycl::range<3>{1, 2, 4}};
-    assert(!"out of bounds exception wasn't caught");
+    CHECK(!"out of bounds exception wasn't caught");
   } catch (const cl::sycl::invalid_object_error &e) {
     std::cerr << e.what() << std::endl;
   }
@@ -191,7 +193,7 @@ void checkExceptions() {
                                      /*size*/ cl::sycl::range<2>{2, 8}};
     cl::sycl::buffer<int, 2> sub_sub_buf(sub_buf, cl::sycl::range<2>{0, 0},
                                          /*size*/ cl::sycl::range<2>{0, 0});
-    assert(!"invalid subbuffer exception wasn't caught");
+    CHECK(!"invalid subbuffer exception wasn't caught");
   } catch (const cl::sycl::invalid_object_error &e) {
     std::cerr << e.what() << std::endl;
   }
@@ -240,13 +242,13 @@ void copyBlock() {
       auto *V = BlockB.get_access<mode::read>().get_pointer();
 
       for (size_t Idx2 = 0; Idx2 < BlockSize; ++Idx2) {
-        assert(V[Idx2] == Idx2 + BlockSize * Idx &&
-               "Invalid data in block buffer");
+        CHECK(V[Idx2] == Idx2 + BlockSize * Idx &&
+              "Invalid data in block buffer");
       }
     }
   }
   catch (cl::sycl::exception& ex) {
-    assert(false && "Unexpected exception captured!");
+    CHECK(false && "Unexpected exception captured!");
   }
 }
 
@@ -272,7 +274,7 @@ void checkMultipleContexts() {
           sycl::range<1>(N / 2), [=](sycl::id<1> idx) { bufacc[idx[0]] = 2; });
     });
   }
-  assert(a[N / 2 - 1] == 1 && a[N / 2] == 2 && "Sub buffer data loss");
+  CHECK(a[N / 2 - 1] == 1 && a[N / 2] == 2 && "Sub buffer data loss");
 }
 
 int main() {
diff --git a/sycl/test/basic_tests/buffer/subbuffer_interop.cpp b/sycl/test/basic_tests/buffer/subbuffer_interop.cpp
index 092eda64f7df..ae1fa0aa8c60 100644
--- a/sycl/test/basic_tests/buffer/subbuffer_interop.cpp
+++ b/sycl/test/basic_tests/buffer/subbuffer_interop.cpp
@@ -1,10 +1,12 @@
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
-// REQUIRES: opencl
-
 //==------------ subbuffer_interop.cpp - SYCL buffer basic test ------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -13,9 +15,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../../helpers.hpp"
 #include <CL/sycl.hpp>
-
-#include <cassert>
 #include <memory>
 #include <numeric>
 
@@ -104,19 +105,20 @@ int main() {
       clReleaseKernel(clKernel);
       clReleaseProgram(clProgram);
     } catch (exception &ex) {
-      std::cout << ex.what() << std::endl;
+      std::cerr << ex.what() << std::endl;
+      throw;
     }
 
     for (int i = 0; i < NSize; ++i) {
       if (i < NSize / 2 && AMem[i] != i) {
         std::cout << " array[" << i << "] is " << AMem[i] << " expected " << i
                   << std::endl;
-        assert(false);
+        CHECK(false);
         Failed = true;
       } else if (i >= NSize / 2 && AMem[i] != 0) {
         std::cout << " array[" << i << "] is " << AMem[i] << " expected " << 0
                   << std::endl;
-        assert(false);
+        CHECK(false);
         Failed = true;
       }
     }
@@ -172,29 +174,30 @@ int main() {
       clReleaseKernel(clKernel);
       clReleaseProgram(clProgram);
     } catch (exception &ex) {
-      std::cout << ex.what() << std::endl;
+      std::cerr << ex.what() << std::endl;
+      throw;
     }
 
     for (int i = 0; i < NSize; ++i) {
       if (i < NSize / 4 && AMem[i] != 0) {
         std::cout << " array[" << i << "] is " << AMem[i] << " expected " << 0
                   << std::endl;
-        assert(false);
+        CHECK(false);
         Failed = true;
       } else if (i >= NSize / 4 && i < 2 * NSize / 4 && AMem[i] != i) {
         std::cout << " array[" << i << "] is " << AMem[i] << " expected " << i
                   << std::endl;
-        assert(false);
+        CHECK(false);
         Failed = true;
       } else if (i >= 2 * NSize / 4 && i < 3 * NSize / 4 && AMem[i] != 0) {
         std::cout << " array[" << i << "] is " << AMem[i] << " expected " << 0
                   << std::endl;
-        assert(false);
+        CHECK(false);
         Failed = true;
       } else if (i >= 3 * NSize / 4 && AMem[i] != i) {
         std::cout << " array[" << i << "] is " << AMem[i] << " expected " << i
                   << std::endl;
-        assert(false);
+        CHECK(false);
         Failed = true;
       }
     }
@@ -262,24 +265,25 @@ int main() {
       clReleaseKernel(clKernel2);
       clReleaseProgram(clProgram);
     } catch (exception &ex) {
-      std::cout << ex.what() << std::endl;
+      std::cerr << ex.what() << std::endl;
+      throw;
     }
 
     for (int i = 0; i < NSize; ++i) {
       if (i < NSize / 4 && AMem[i] != 0) {
         std::cout << " array[" << i << "] is " << AMem[i] << " expected " << 0
                   << std::endl;
-        assert(false);
+        CHECK(false);
         Failed = true;
       } else if (i >= NSize / 4 && i < 2 * NSize / 4 && AMem[i] != 1) {
         std::cout << " array[" << i << "] is " << AMem[i] << " expected " << i
                   << std::endl;
-        assert(false);
+        CHECK(false);
         Failed = true;
       } else if (i >= 2 * NSize / 4 && AMem[i] != i) {
         std::cout << " array[" << i << "] is " << AMem[i] << " expected " << i
                   << std::endl;
-        assert(false);
+        CHECK(false);
         Failed = true;
       }
     }
@@ -317,7 +321,7 @@ int main() {
     {
       auto host_acc = subbuf_copy->get_access<cl::sycl::access::mode::read>();
       std::cout << "On host: offset = " << host_acc[0] << std::endl;
-      assert(host_acc[0] == 256 && "Invalid subbuffer origin");
+      CHECK(host_acc[0] == 256 && "Invalid subbuffer origin");
     }
 
     Q.submit([&](cl::sycl::handler &cgh) {
@@ -331,7 +335,7 @@ int main() {
     {
       auto host_acc = subbuf_copy->get_access<cl::sycl::access::mode::read>();
       std::cout << "On host: offset = " << host_acc[0] << std::endl;
-      assert(host_acc[0] == 256 * 3 && "Invalid subbuffer origin");
+      CHECK(host_acc[0] == 256 * 3 && "Invalid subbuffer origin");
     }
   }
 
diff --git a/sycl/test/basic_tests/context.cpp b/sycl/test/basic_tests/context.cpp
index 8f40ee2be284..cc422143a095 100644
--- a/sycl/test/basic_tests/context.cpp
+++ b/sycl/test/basic_tests/context.cpp
@@ -9,6 +9,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 #include <iostream>
 
@@ -19,6 +20,7 @@ int main() {
     context c;
   } catch (device_error e) {
     std::cout << "Failed to create device for context" << std::endl;
+    throw;
   }
 
   auto devices = device::get_devices();
@@ -29,10 +31,10 @@ int main() {
     context Context(deviceA);
     size_t hash = hash_class<context>()(Context);
     context MovedContext(std::move(Context));
-    assert(hash == hash_class<context>()(MovedContext));
-    assert(deviceA.is_host() == MovedContext.is_host());
+    CHECK(hash == hash_class<context>()(MovedContext));
+    CHECK(deviceA.is_host() == MovedContext.is_host());
     if (!deviceA.is_host()) {
-      assert(MovedContext.get() != nullptr);
+      CHECK(MovedContext.get() != nullptr);
     }
   }
   {
@@ -41,10 +43,10 @@ int main() {
     size_t hash = hash_class<context>()(Context);
     context WillMovedContext(deviceB);
     WillMovedContext = std::move(Context);
-    assert(hash == hash_class<context>()(WillMovedContext));
-    assert(deviceA.is_host() == WillMovedContext.is_host());
+    CHECK(hash == hash_class<context>()(WillMovedContext));
+    CHECK(deviceA.is_host() == WillMovedContext.is_host());
     if (!deviceA.is_host()) {
-      assert(WillMovedContext.get() != nullptr);
+      CHECK(WillMovedContext.get() != nullptr);
     }
   }
   {
@@ -52,10 +54,10 @@ int main() {
     context Context(deviceA);
     size_t hash = hash_class<context>()(Context);
     context ContextCopy(Context);
-    assert(hash == hash_class<context>()(Context));
-    assert(hash == hash_class<context>()(ContextCopy));
-    assert(Context == ContextCopy);
-    assert(Context.is_host() == ContextCopy.is_host());
+    CHECK(hash == hash_class<context>()(Context));
+    CHECK(hash == hash_class<context>()(ContextCopy));
+    CHECK(Context == ContextCopy);
+    CHECK(Context.is_host() == ContextCopy.is_host());
   }
   {
     std::cout << "copy assignment operator" << std::endl;
@@ -63,9 +65,11 @@ int main() {
     size_t hash = hash_class<context>()(Context);
     context WillContextCopy(deviceB);
     WillContextCopy = Context;
-    assert(hash == hash_class<context>()(Context));
-    assert(hash == hash_class<context>()(WillContextCopy));
-    assert(Context == WillContextCopy);
-    assert(Context.is_host() == WillContextCopy.is_host());
+    CHECK(hash == hash_class<context>()(Context));
+    CHECK(hash == hash_class<context>()(WillContextCopy));
+    CHECK(Context == WillContextCopy);
+    CHECK(Context.is_host() == WillContextCopy.is_host());
   }
+
+  return 0;
 }
diff --git a/sycl/test/basic_tests/device_event.cpp b/sycl/test/basic_tests/device_event.cpp
index 79231031d8e5..9903170e4406 100644
--- a/sycl/test/basic_tests/device_event.cpp
+++ b/sycl/test/basic_tests/device_event.cpp
@@ -63,9 +63,10 @@ int test_strideN(size_t stride) {
         try {
           std::rethrow_exception(ep);
         } catch (std::exception& e) {
-          std::cout << e.what();
+          std::cerr << e.what();
         }
       }
+      throw "ERROR: Asynchronous exception(s)";
     });
 
     buffer<int, 1> out_buf(out_data, range<1>(nElems));
@@ -108,8 +109,8 @@ int test_strideN(size_t stride) {
     });
 
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
-    return 2;
+    std::cerr << "SYCL exception caught: " << e.what();
+    throw;
   }
 
   return check_results(out_data, stride);
diff --git a/sycl/test/basic_tests/diagnostics/handler.cpp b/sycl/test/basic_tests/diagnostics/handler.cpp
index 38685ece5256..322e71111c8c 100644
--- a/sycl/test/basic_tests/diagnostics/handler.cpp
+++ b/sycl/test/basic_tests/diagnostics/handler.cpp
@@ -7,7 +7,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
+#include "../../helpers.hpp"
 #include <CL/sycl.hpp>
 
 using namespace cl;
@@ -30,8 +30,12 @@ int main() {
       CGH.single_task<class Dummy2>([]() {});
     });
     Queue.throw_asynchronous();
-  } catch (sycl::exception &E) {
+    CHECK(!"Expected exception not caught");
+  } catch (sycl::exception &ExpectedException) {
     // CHECK: Attempt to set multiple actions for the command group
-    std::cout << E.what() << std::endl;
+    // Using std::cout as input for FileCheck.
+    std::cout << ExpectedException.what() << std::endl;
   }
+
+  return 0;
 }
diff --git a/sycl/test/basic_tests/event.cpp b/sycl/test/basic_tests/event.cpp
index af4f8b1bbaaf..1cb1bec86b8d 100644
--- a/sycl/test/basic_tests/event.cpp
+++ b/sycl/test/basic_tests/event.cpp
@@ -1,3 +1,7 @@
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 //==--------------- event.cpp - SYCL event test ----------------------------==//
@@ -7,6 +11,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 #include <iostream>
 
@@ -15,7 +20,8 @@ int main() {
     std::cout << "Create default event" << std::endl;
     cl::sycl::event e;
   } catch (cl::sycl::device_error e) {
-    std::cout << "Failed to create device for event" << std::endl;
+    std::cerr << "Failed to create device for event" << std::endl;
+    throw;
   }
   try {
     std::cout << "Try create OpenCL event" << std::endl;
@@ -28,11 +34,10 @@ int main() {
                 << ((cl_e.get() == u_e) ? " matches " : " does not match ")
                 << u_e << std::endl;
 
-    } else {
-      std::cout << "Failed to create OpenCL context" << std::endl;
     }
   } catch (cl::sycl::device_error e) {
-    std::cout << "Failed to create device for context" << std::endl;
+    std::cerr << "Failed to create device for context" << std::endl;
+    throw;
   }
 
   {
@@ -40,7 +45,7 @@ int main() {
     cl::sycl::event Event;
     size_t hash = std::hash<cl::sycl::event>()(Event);
     cl::sycl::event MovedEvent(std::move(Event));
-    assert(hash == std::hash<cl::sycl::event>()(MovedEvent));
+    CHECK(hash == std::hash<cl::sycl::event>()(MovedEvent));
   }
 
   {
@@ -49,7 +54,7 @@ int main() {
     size_t hash = std::hash<cl::sycl::event>()(Event);
     cl::sycl::event WillMovedEvent;
     WillMovedEvent = std::move(Event);
-    assert(hash == std::hash<cl::sycl::event>()(WillMovedEvent));
+    CHECK(hash == std::hash<cl::sycl::event>()(WillMovedEvent));
   }
 
   {
@@ -57,9 +62,9 @@ int main() {
     cl::sycl::event Event;
     size_t hash = std::hash<cl::sycl::event>()(Event);
     cl::sycl::event EventCopy(Event);
-    assert(hash == std::hash<cl::sycl::event>()(Event));
-    assert(hash == std::hash<cl::sycl::event>()(EventCopy));
-    assert(Event == EventCopy);
+    CHECK(hash == std::hash<cl::sycl::event>()(Event));
+    CHECK(hash == std::hash<cl::sycl::event>()(EventCopy));
+    CHECK(Event == EventCopy);
   }
 
   {
@@ -68,9 +73,9 @@ int main() {
     size_t hash = std::hash<cl::sycl::event>()(Event);
     cl::sycl::event WillEventCopy;
     WillEventCopy = Event;
-    assert(hash == std::hash<cl::sycl::event>()(Event));
-    assert(hash == std::hash<cl::sycl::event>()(WillEventCopy));
-    assert(Event == WillEventCopy);
+    CHECK(hash == std::hash<cl::sycl::event>()(Event));
+    CHECK(hash == std::hash<cl::sycl::event>()(WillEventCopy));
+    CHECK(Event == WillEventCopy);
   }
 
   // Check wait and wait_and_throw methods do not crash
@@ -106,4 +111,6 @@ int main() {
     }
     }
   }
+
+  return 0;
 }
diff --git a/sycl/test/basic_tests/event_async_exception.cpp b/sycl/test/basic_tests/event_async_exception.cpp
index 0dd803dc5d10..dd8a5ac5e7f3 100644
--- a/sycl/test/basic_tests/event_async_exception.cpp
+++ b/sycl/test/basic_tests/event_async_exception.cpp
@@ -33,7 +33,7 @@ int main() {
 
     e.wait_and_throw();
     return 1;
-  } catch (runtime_error e) {
+  } catch (runtime_error expectedException) {
     return 0;
   }
 }
diff --git a/sycl/test/basic_tests/image_accessor_readwrite.cpp b/sycl/test/basic_tests/image_accessor_readwrite.cpp
index 9ebaa14e5e55..4e051d31fb8b 100644
--- a/sycl/test/basic_tests/image_accessor_readwrite.cpp
+++ b/sycl/test/basic_tests/image_accessor_readwrite.cpp
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
-#include <cassert>
 #include <iomanip>
 #if DEBUG_OUTPUT
 #include <iostream>
@@ -35,7 +35,7 @@ void check_write_data(PixelDataType *HostDataPtr, PixelDataT ExpectedData) {
         (HostDataPtr[3] == (PixelDataType)ExpectedData.w())) {
       std::cout << "Data written is correct: " << std::endl;
     } else {
-      std::cout << "Data written is WRONG: " << std::endl;
+      std::cerr << "Data written is WRONG: " << std::endl;
     }
     std::cout << "HostDataPtr: \t" << (float)HostDataPtr[0] << "  "
               << (float)HostDataPtr[1] << "  " << (float)HostDataPtr[2] << "  "
@@ -46,10 +46,10 @@ void check_write_data(PixelDataType *HostDataPtr, PixelDataT ExpectedData) {
               << "  " << (float)ExpectedData.w() << std::endl;
   }
 #else
-  assert(HostDataPtr[0] == (PixelDataType)ExpectedData.x());
-  assert(HostDataPtr[1] == (PixelDataType)ExpectedData.y());
-  assert(HostDataPtr[2] == (PixelDataType)ExpectedData.z());
-  assert(HostDataPtr[3] == (PixelDataType)ExpectedData.w());
+  CHECK(HostDataPtr[0] == (PixelDataType)ExpectedData.x());
+  CHECK(HostDataPtr[1] == (PixelDataType)ExpectedData.y());
+  CHECK(HostDataPtr[2] == (PixelDataType)ExpectedData.z());
+  CHECK(HostDataPtr[3] == (PixelDataType)ExpectedData.w());
 #endif
 }
 
@@ -62,7 +62,7 @@ void check_write_data(s::cl_half *HostDataPtr, s::cl_half4 ExpectedData) {
         (HostDataPtr[3] == (float)ExpectedData.w())) {
       std::cout << "Data written is correct: " << std::endl;
     } else {
-      std::cout << "Data written is WRONG: " << std::endl;
+      std::cerr << "Data written is WRONG: " << std::endl;
     }
     std::cout << "HostDataPtr: \t" << (float)HostDataPtr[0] << "  "
               << (float)HostDataPtr[1] << "  " << (float)HostDataPtr[2] << "  "
@@ -73,10 +73,10 @@ void check_write_data(s::cl_half *HostDataPtr, s::cl_half4 ExpectedData) {
               << "  " << (float)ExpectedData.w() << std::endl;
   }
 #else
-  assert(HostDataPtr[0] == (float)ExpectedData.x());
-  assert(HostDataPtr[1] == (float)ExpectedData.y());
-  assert(HostDataPtr[2] == (float)ExpectedData.z());
-  assert(HostDataPtr[3] == (float)ExpectedData.w());
+  CHECK(HostDataPtr[0] == (float)ExpectedData.x());
+  CHECK(HostDataPtr[1] == (float)ExpectedData.y());
+  CHECK(HostDataPtr[2] == (float)ExpectedData.z());
+  CHECK(HostDataPtr[3] == (float)ExpectedData.w());
 #endif
 }
 
@@ -94,7 +94,7 @@ void check_read_data(ReadDataT ReadData, ReadDataT ExpectedColor) {
         ((ReadDataType)ReadData.w() == (ReadDataType)ExpectedColor.w())) {
       std::cout << "Read Data is correct: " << std::endl;
     } else {
-      std::cout << "Read Data is WRONG: " << std::endl;
+      std::cerr << "Read Data is WRONG: " << std::endl;
     }
     std::cout << "ReadData: \t"
               << std::setprecision(std::numeric_limits<long double>::digits10 +
@@ -114,10 +114,10 @@ void check_read_data(ReadDataT ReadData, ReadDataT ExpectedColor) {
   }
 #else
   {
-    assert((ReadDataType)ReadData.x() == (ReadDataType)ExpectedColor.x());
-    assert((ReadDataType)ReadData.y() == (ReadDataType)ExpectedColor.y());
-    assert((ReadDataType)ReadData.z() == (ReadDataType)ExpectedColor.z());
-    assert((ReadDataType)ReadData.w() == (ReadDataType)ExpectedColor.w());
+    CHECK((ReadDataType)ReadData.x() == (ReadDataType)ExpectedColor.x());
+    CHECK((ReadDataType)ReadData.y() == (ReadDataType)ExpectedColor.y());
+    CHECK((ReadDataType)ReadData.z() == (ReadDataType)ExpectedColor.z());
+    CHECK((ReadDataType)ReadData.w() == (ReadDataType)ExpectedColor.w());
   }
 #endif
 }
@@ -135,7 +135,7 @@ void check_read_data(s::cl_float4 ReadData, s::cl_float4 ExpectedColor) {
         ((s::cl_int)Diff.w() <= 1 && (s::cl_int)Diff.w() >= -1)) {
       std::cout << "Read Data is correct within precision: " << std::endl;
     } else {
-      std::cout << "Read Data is WRONG/ outside precision: " << std::endl;
+      std::cerr << "Read Data is WRONG/ outside precision: " << std::endl;
     }
     std::cout << "ReadData: \t"
               << std::setprecision(std::numeric_limits<long double>::digits10 +
@@ -153,10 +153,10 @@ void check_read_data(s::cl_float4 ReadData, s::cl_float4 ExpectedColor) {
   }
 #else
   {
-    assert((s::cl_int)Diff.x() <= 1 && (s::cl_int)Diff.x() >= -1);
-    assert((s::cl_int)Diff.y() <= 1 && (s::cl_int)Diff.y() >= -1);
-    assert((s::cl_int)Diff.z() <= 1 && (s::cl_int)Diff.z() >= -1);
-    assert((s::cl_int)Diff.w() <= 1 && (s::cl_int)Diff.w() >= -1);
+    CHECK((s::cl_int)Diff.x() <= 1 && (s::cl_int)Diff.x() >= -1);
+    CHECK((s::cl_int)Diff.y() <= 1 && (s::cl_int)Diff.y() >= -1);
+    CHECK((s::cl_int)Diff.z() <= 1 && (s::cl_int)Diff.z() >= -1);
+    CHECK((s::cl_int)Diff.w() <= 1 && (s::cl_int)Diff.w() >= -1);
   }
 #endif
 }
diff --git a/sycl/test/basic_tests/image_api.cpp b/sycl/test/basic_tests/image_api.cpp
index 4e7976311416..b8d4e517abee 100644
--- a/sycl/test/basic_tests/image_api.cpp
+++ b/sycl/test/basic_tests/image_api.cpp
@@ -1,3 +1,8 @@
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+// CUDA cannot support OpenCL spec conform images.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -I %sycl_source_dir %s -o %t1.out
 // RUN: %clangxx -I %sycl_source_dir %s -o %t3.out -lsycl
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
@@ -13,9 +18,9 @@
 #include <detail/kernel_impl.hpp>
 #include <detail/scheduler/scheduler.hpp>
 
+#include "../helpers.hpp"
 #include <algorithm>
 #include <array>
-#include <cassert>
 #include <iostream>
 #include <memory>
 #include <utility>
@@ -144,7 +149,8 @@ int main() {
     s::event EventRet = d::createSyclObjFromImpl<s::event>(Event);
     EventRet.wait();
   } catch (const s::exception &E) {
-    std::cout << "SYCL exception caught: " << E.what() << std::endl;
+    std::cerr << "SYCL exception caught: " << E.what() << std::endl;
+    throw;
   }
 
   s::float4 Expected{10.f, 20.f, 30.f, 40.f};
@@ -157,7 +163,7 @@ int main() {
   if (Result) {
     std::cout << "The result is correct." << std::endl;
   } else {
-    std::cout << "The result is incorrect." << std::endl;
+    std::cerr << "The result is incorrect." << std::endl;
     assert(Result);
   }
   return 0;
diff --git a/sycl/test/basic_tests/info.cpp b/sycl/test/basic_tests/info.cpp
index 761c7c52a5ca..89cb94c3722a 100644
--- a/sycl/test/basic_tests/info.cpp
+++ b/sycl/test/basic_tests/info.cpp
@@ -11,6 +11,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 #include <iostream>
 #include <string>
@@ -314,6 +315,7 @@ int main() {
   if (!dev.is_host()) {
     try {
       print_info<info::device::parent_device, device>(dev, "Parent device");
+      CHECK(!"Expected exception has not been caught");
     } catch (invalid_object_error e) {
       std::cout << "Expected exception has been caught: " << e.what()
                 << std::endl;
@@ -361,4 +363,6 @@ int main() {
   std::cout << separator << "Platform from context information\n" << separator;
   auto cplt = ctx.get_info<cl::sycl::info::context::platform>();
   print_info<info::platform::name, string_class>(cplt, "Name");
+
+  return 0;
 }
diff --git a/sycl/test/basic_tests/kernel_interop.cpp b/sycl/test/basic_tests/kernel_interop.cpp
index 5e24cd66d058..ea111161580a 100644
--- a/sycl/test/basic_tests/kernel_interop.cpp
+++ b/sycl/test/basic_tests/kernel_interop.cpp
@@ -1,10 +1,12 @@
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
-// REQUIRES: opencl
-
 //==--------------- kernel_interop.cpp - SYCL kernel ocl interop test ------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -12,10 +14,9 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 
-#include <cassert>
-
 using namespace cl::sycl;
 
 // This test checks that SYCL kernel interoperabitily constructor is implemented
@@ -55,10 +56,11 @@ int main() {
     context Context1 = Queue1.get_context();
     try {
       kernel Kernel(ClKernel, Context1);
-    } catch (cl::sycl::invalid_parameter_error e) {
-      Pass = true;
+      CHECK(!"Expected exception not caught");
+    } catch (cl::sycl::invalid_parameter_error ExpectedException) {
+      std::cout << "Expected exception caught " << ExpectedException.what()
+                << std::endl;
     }
-    assert(Pass);
 
     kernel Kernel(ClKernel, Context);
 
diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp
index 106cdb31419a..186d467ab7f7 100644
--- a/sycl/test/basic_tests/parallel_for_range.cpp
+++ b/sycl/test/basic_tests/parallel_for_range.cpp
@@ -1,11 +1,13 @@
-// RUN: %clangxx -fsycl %s -o %t.out
+// XFAIL: cuda
+// CUDA exposes broken hierarchical parallelism.
+
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple  %s -o %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// XFAIL: cuda
 
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
-
 #include <iostream>
 
 using namespace cl::sycl;
@@ -38,7 +40,7 @@ int main() {
     Q.wait_and_throw();
     std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been "
                  "thrown\n";
-    return 1; // We shouldn't be here, exception is expected
+    CHECK(!"Expected exception not caught");
   } catch (nd_range_error &E) {
     if (string_class(E.what()).find(
             "Specified local size doesn't match the required work-group size "
@@ -60,7 +62,7 @@ int main() {
   }
 
   string_class OCLVersionStr = D.get_info<info::device::version>();
-  assert(OCLVersionStr.size() >= 10 &&
+  CHECK(OCLVersionStr.size() >= 10 &&
          "Unexpected device version string"); // strlen("OpenCL X.Y")
   const char *OCLVersion = &OCLVersionStr[7]; // strlen("OpenCL ")
   if (OCLVersion[0] == '1' || (OCLVersion[0] == '2' && OCLVersion[2] == '0')) {
@@ -75,7 +77,7 @@ int main() {
       std::cerr
           << "Test case ReqdWGSizeNegativeB failed: no exception has been "
              "thrown\n";
-      return 1; // We shouldn't be here, exception is expected
+      CHECK(!"Expected exception not caught");
     } catch (nd_range_error &E) {
       if (string_class(E.what()).find(
               "OpenCL 1.x and 2.0 requires to pass local size argument even if "
@@ -145,7 +147,7 @@ int main() {
         std::cerr
             << "Test case OpenCL1XNegativeA failed: no exception has been "
                "thrown\n";
-        return 1; // We shouldn't be here, exception is expected
+        CHECK(!"Expected exception not caught");
       }
     } catch (nd_range_error &E) {
       if (string_class(E.what()).find("Non-uniform work-groups are not "
@@ -182,7 +184,7 @@ int main() {
         std::cerr
             << "Test case OpenCL1XNegativeB failed: no exception has been "
                "thrown\n";
-        return 1; // We shouldn't be here, exception is expected
+        CHECK(!"Expected exception not caught");
       }
     } catch (nd_range_error &E) {
       if (string_class(E.what()).find("Non-uniform work-groups are not "
@@ -220,7 +222,7 @@ int main() {
       Q.wait_and_throw();
       std::cerr << "Test case OpenCL1XNegativeC failed: no exception has been "
                    "thrown\n";
-      return 1; // We shouldn't be here, exception is expected
+      CHECK(!"Expected exception not caught");
     } catch (nd_range_error &E) {
       if (string_class(E.what()).find(
               "Total number of work-items in a work-group cannot exceed "
@@ -269,7 +271,7 @@ int main() {
         std::cerr
             << "Test case OpenCL2XNegativeA failed: no exception has been "
                "thrown\n";
-        return 1; // We shouldn't be here, exception is expected
+        CHECK(!"Expected exception not caught");
       } catch (nd_range_error &E) {
         if (string_class(E.what()).find(
                 "Total number of work-items in a work-group cannot exceed "
@@ -314,7 +316,7 @@ int main() {
           std::cerr
               << "Test case OpenCL2XNegativeB failed: no exception has been "
                  "thrown\n";
-          return 1; // We shouldn't be here, exception is expected
+          CHECK(!"Expected exception not caught");
         }
       } catch (nd_range_error &E) {
         if (string_class(E.what()).find(
@@ -354,7 +356,7 @@ int main() {
           std::cerr
               << "Test case OpenCL2XNegativeC failed: no exception has been "
                  "thrown\n";
-          return 1; // We shouldn't be here, exception is expected
+          CHECK(!"Expected exception not caught");
         }
       } catch (nd_range_error &E) {
         if (string_class(E.what()).find(
@@ -468,7 +470,7 @@ int main() {
           std::cerr
               << "Test case OpenCL2XNegativeD failed: no exception has been "
                  "thrown\n";
-          return 1; // We shouldn't be here, exception is expected
+          CHECK(!"Expected exception not caught");
         }
       } catch (nd_range_error &E) {
         if (string_class(E.what()).find(
@@ -515,7 +517,7 @@ int main() {
           std::cerr
               << "Test case OpenCL2XNegativeE failed: no exception has been "
                  "thrown\n";
-          return 1; // We shouldn't be here, exception is expected
+          CHECK(!"Expected exception not caught");
         }
       } catch (nd_range_error &E) {
         if (string_class(E.what()).find(
@@ -551,8 +553,10 @@ int main() {
     Q.wait_and_throw();
     std::cerr << "Test case NegativeA failed: no exception has been "
                  "thrown\n";
-    return 1; // We shouldn't be here, exception is expected
-  } catch (runtime_error) {
+    CHECK(!"Expected exception not caught");
+  } catch (runtime_error ExpectedException) {
+    std::cout << "Expected exception caught " << ExpectedException.what()
+              << std::endl;
   }
 
   // parallel_for_work_group with 0-based local range
@@ -564,8 +568,10 @@ int main() {
     Q.wait_and_throw();
     std::cerr << "Test case NegativeB failed: no exception has been "
                  "thrown\n";
-    return 1; // We shouldn't be here, exception is expected
-  } catch (runtime_error) {
+    CHECK(!"Expected exception not caught");
+  } catch (runtime_error ExpectedException) {
+    std::cout << "Expected exception caught " << ExpectedException.what()
+              << std::endl;
   }
 
   return 0;
diff --git a/sycl/test/basic_tests/parallel_for_range_host.cpp b/sycl/test/basic_tests/parallel_for_range_host.cpp
index 3fb3b24b44f6..2440b9938260 100644
--- a/sycl/test/basic_tests/parallel_for_range_host.cpp
+++ b/sycl/test/basic_tests/parallel_for_range_host.cpp
@@ -1,8 +1,8 @@
 // RUN: %clangxx -fsycl %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
-
 #include <iostream>
 
 using namespace cl::sycl;
@@ -25,7 +25,7 @@ int main() {
     Q.wait_and_throw();
     std::cerr << "Test case 'a' failed: no exception has been thrown"
               << std::endl;
-    return 1;
+    CHECK(!"Expected exception not caught");
   } catch (nd_range_error) {
     // We expect an error to be thrown!
   }
@@ -65,7 +65,7 @@ int main() {
     Q.wait_and_throw();
     std::cerr << "Test case 'd' failed: no exception has been thrown"
               << std::endl;
-    return 1;
+    CHECK(!"Expected exception not caught");
   } catch (nd_range_error) {
   }
 
@@ -78,7 +78,7 @@ int main() {
     Q.wait_and_throw();
     std::cerr << "Test case 'e' failed: no exception has been thrown"
               << std::endl;
-    return 1;
+    CHECK(!"Expected exception not caught");
   } catch (nd_range_error) {
   }
 
@@ -91,7 +91,7 @@ int main() {
     Q.wait_and_throw();
     std::cerr << "Test case 'f' failed: no exception has been thrown"
               << std::endl;
-    return 1;
+    CHECK(!"Expected exception not caught");
   } catch (nd_range_error) {
   }
 
diff --git a/sycl/test/basic_tests/queue.cpp b/sycl/test/basic_tests/queue.cpp
index 50ba658576ee..37dad9eb4488 100644
--- a/sycl/test/basic_tests/queue.cpp
+++ b/sycl/test/basic_tests/queue.cpp
@@ -8,6 +8,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 #include <iostream>
 
@@ -32,6 +33,7 @@ int main() {
 
   } catch (device_error e) {
     std::cout << "Failed to create device for context" << std::endl;
+    throw;
   }
 
   auto devices = device::get_devices();
@@ -42,10 +44,10 @@ int main() {
     queue Queue(deviceA);
     size_t hash = hash_class<queue>()(Queue);
     queue MovedQueue(std::move(Queue));
-    assert(hash == hash_class<queue>()(MovedQueue));
-    assert(deviceA.is_host() == MovedQueue.is_host());
+    CHECK(hash == hash_class<queue>()(MovedQueue));
+    CHECK(deviceA.is_host() == MovedQueue.is_host());
     if (!deviceA.is_host()) {
-      assert(MovedQueue.get() != nullptr);
+      CHECK(MovedQueue.get() != nullptr);
     }
   }
   {
@@ -54,10 +56,10 @@ int main() {
     size_t hash = hash_class<queue>()(Queue);
     queue WillMovedQueue(deviceB);
     WillMovedQueue = std::move(Queue);
-    assert(hash == hash_class<queue>()(WillMovedQueue));
-    assert(deviceA.is_host() == WillMovedQueue.is_host());
+    CHECK(hash == hash_class<queue>()(WillMovedQueue));
+    CHECK(deviceA.is_host() == WillMovedQueue.is_host());
     if (!deviceA.is_host()) {
-      assert(WillMovedQueue.get() != nullptr);
+      CHECK(WillMovedQueue.get() != nullptr);
     }
   }
   {
@@ -65,10 +67,10 @@ int main() {
     queue Queue(deviceA);
     size_t hash = hash_class<queue>()(Queue);
     queue QueueCopy(Queue);
-    assert(hash == hash_class<queue>()(Queue));
-    assert(hash == hash_class<queue>()(QueueCopy));
-    assert(Queue == QueueCopy);
-    assert(Queue.is_host() == QueueCopy.is_host());
+    CHECK(hash == hash_class<queue>()(Queue));
+    CHECK(hash == hash_class<queue>()(QueueCopy));
+    CHECK(Queue == QueueCopy);
+    CHECK(Queue.is_host() == QueueCopy.is_host());
   }
   {
     std::cout << "copy assignment operator" << std::endl;
@@ -76,10 +78,10 @@ int main() {
     size_t hash = hash_class<queue>()(Queue);
     queue WillQueueCopy(deviceB);
     WillQueueCopy = Queue;
-    assert(hash == hash_class<queue>()(Queue));
-    assert(hash == hash_class<queue>()(WillQueueCopy));
-    assert(Queue == WillQueueCopy);
-    assert(Queue.is_host() == WillQueueCopy.is_host());
+    CHECK(hash == hash_class<queue>()(Queue));
+    CHECK(hash == hash_class<queue>()(WillQueueCopy));
+    CHECK(Queue == WillQueueCopy);
+    CHECK(Queue.is_host() == WillQueueCopy.is_host());
   }
 
   {
@@ -88,7 +90,7 @@ int main() {
     try {
       Queue.throw_asynchronous();
     } catch (const std::bad_function_call &e) {
-      std::cout << "Default asynchronous handler call failed: " << e.what()
+      std::cerr << "Default asynchronous handler call failed: " << e.what()
                 << std::endl;
       throw;
     }
@@ -99,7 +101,7 @@ int main() {
     device Device = Selector.select_device();
     context Context(Device);
     queue Queue(Context, Selector);
-    assert(Context == Queue.get_context());
+    CHECK(Context == Queue.get_context());
   }
 
   {
diff --git a/sycl/test/basic_tests/subdevice.cpp b/sycl/test/basic_tests/subdevice.cpp
index bd4e237f8034..3997db5c436f 100644
--- a/sycl/test/basic_tests/subdevice.cpp
+++ b/sycl/test/basic_tests/subdevice.cpp
@@ -12,138 +12,135 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 #include <algorithm>
-#include <cassert>
 #include <iostream>
 #include <utility>
 
 using namespace cl::sycl;
 
 int main() {
-  try {
-    auto devices = device::get_devices();
-    for (const auto &dev : devices) {
-      // TODO: implement subdevices creation for host device
-      if (dev.is_host())
-        continue;
-
-      assert(dev.get_info<info::device::partition_type_property>() ==
-             info::partition_property::no_partition);
-
-      size_t MaxSubDevices =
-          dev.get_info<info::device::partition_max_sub_devices>();
-      if (MaxSubDevices == 0)
-        continue;
-
-      try {
-        auto SubDevicesEq =
-            dev.create_sub_devices<info::partition_property::partition_equally>(
-                1);
-        assert(SubDevicesEq.size() == MaxSubDevices &&
-               "Requested 1 compute unit in each subdevice, expected maximum "
-               "number of subdevices in output");
-        std::cout << "Created " << SubDevicesEq.size()
-                  << " subdevices using equal partition scheme" << std::endl;
-
-        assert(
-            SubDevicesEq[0].get_info<info::device::partition_type_property>() ==
+  // Not catching exceptions to make test fail instead.
+
+  auto devices = device::get_devices();
+  for (const auto &dev : devices) {
+    // TODO: implement subdevices creation for host device
+    if (dev.is_host())
+      continue;
+
+    CHECK(dev.get_info<info::device::partition_type_property>() ==
+          info::partition_property::no_partition);
+
+    size_t MaxSubDevices =
+        dev.get_info<info::device::partition_max_sub_devices>();
+    if (MaxSubDevices == 0)
+      continue;
+
+    try {
+      auto SubDevicesEq =
+          dev.create_sub_devices<info::partition_property::partition_equally>(
+              1);
+      CHECK(SubDevicesEq.size() == MaxSubDevices &&
+            "Requested 1 compute unit in each subdevice, expected maximum "
+            "number of subdevices in output");
+      std::cout << "Created " << SubDevicesEq.size()
+                << " subdevices using equal partition scheme" << std::endl;
+
+      CHECK(SubDevicesEq[0].get_info<info::device::partition_type_property>() ==
             info::partition_property::partition_equally);
 
-        assert(SubDevicesEq[0].get_info<info::device::parent_device>().get() ==
-               dev.get());
-      } catch (feature_not_supported) {
-        // okay skip it
-      }
-
-      try {
-        vector_class<size_t> Counts(MaxSubDevices, 1);
-        auto SubDevicesByCount = dev.create_sub_devices<
-            info::partition_property::partition_by_counts>(Counts);
-        assert(SubDevicesByCount.size() == MaxSubDevices &&
-               "Maximum number of subdevices was requested with 1 compute unit "
-               "on each");
-        std::cout << "Created " << SubDevicesByCount.size()
-                  << " subdevices using partition by counts scheme."
-                  << std::endl;
-        assert(SubDevicesByCount[0]
-                   .get_info<info::device::partition_type_property>() ==
-               info::partition_property::partition_by_counts);
-      } catch (feature_not_supported) {
-        // okay skip it
-      }
-
-      try {
-        auto SubDevicesDomainNuma = dev.create_sub_devices<
-            info::partition_property::partition_by_affinity_domain>(
-            info::partition_affinity_domain::numa);
-        std::cout
-            << "Created " << SubDevicesDomainNuma.size()
-            << " subdevices using partition by numa affinity domain scheme."
-            << std::endl;
-      } catch (feature_not_supported) {
-        // okay skip it
-      }
-
-      try {
-        auto SubDevicesDomainL4 = dev.create_sub_devices<
-            info::partition_property::partition_by_affinity_domain>(
-            info::partition_affinity_domain::L4_cache);
-        std::cout << "Created " << SubDevicesDomainL4.size()
-                  << " subdevices using partition by L4 cache domain scheme."
-                  << std::endl;
-      } catch (feature_not_supported) {
-        // okay skip it
-      }
-
-      try {
-        auto SubDevicesDomainL3 = dev.create_sub_devices<
-            info::partition_property::partition_by_affinity_domain>(
-            info::partition_affinity_domain::L3_cache);
-        std::cout << "Created " << SubDevicesDomainL3.size()
-                  << " subdevices using partition by L3 cache domain scheme."
-                  << std::endl;
-      } catch (feature_not_supported) {
-        // okay skip it
-      }
-
-      try {
-        auto SubDevicesDomainL2 = dev.create_sub_devices<
-            info::partition_property::partition_by_affinity_domain>(
-            info::partition_affinity_domain::L2_cache);
-        std::cout << "Created " << SubDevicesDomainL2.size()
-                  << " subdevices using partition by L2 cache domain scheme."
-                  << std::endl;
-      } catch (feature_not_supported) {
-        // okay skip it
-      }
-
-      try {
-        auto SubDevicesDomainL1 = dev.create_sub_devices<
-            info::partition_property::partition_by_affinity_domain>(
-            info::partition_affinity_domain::L1_cache);
-        std::cout << "Created " << SubDevicesDomainL1.size()
-                  << " subdevices using partition by L1 cache domain scheme."
-                  << std::endl;
-      } catch (feature_not_supported) {
-        // okay skip it
-      }
-
-      try {
-        auto SubDevicesDomainNextPart = dev.create_sub_devices<
-            info::partition_property::partition_by_affinity_domain>(
-            info::partition_affinity_domain::next_partitionable);
-        std::cout << "Created " << SubDevicesDomainNextPart.size()
-                  << " subdevices using partition by next partitionable "
-                     "domain scheme."
-                  << std::endl;
-      } catch (feature_not_supported) {
-        // okay skip it
-      }
+      CHECK(SubDevicesEq[0].get_info<info::device::parent_device>().get() ==
+            dev.get());
+    } catch (feature_not_supported) {
+      // okay skip it
+    }
+
+    try {
+      vector_class<size_t> Counts(MaxSubDevices, 1);
+      auto SubDevicesByCount = dev.create_sub_devices<
+          info::partition_property::partition_by_counts>(Counts);
+      CHECK(SubDevicesByCount.size() == MaxSubDevices &&
+            "Maximum number of subdevices was requested with 1 compute unit "
+            "on each");
+      std::cout << "Created " << SubDevicesByCount.size()
+                << " subdevices using partition by counts scheme."
+                << std::endl;
+      CHECK(SubDevicesByCount[0]
+                .get_info<info::device::partition_type_property>() ==
+            info::partition_property::partition_by_counts);
+    } catch (feature_not_supported) {
+      // okay skip it
+    }
+
+    try {
+      auto SubDevicesDomainNuma = dev.create_sub_devices<
+          info::partition_property::partition_by_affinity_domain>(
+          info::partition_affinity_domain::numa);
+      std::cout
+          << "Created " << SubDevicesDomainNuma.size()
+          << " subdevices using partition by numa affinity domain scheme."
+          << std::endl;
+    } catch (feature_not_supported) {
+      // okay skip it
+    }
+
+    try {
+      auto SubDevicesDomainL4 = dev.create_sub_devices<
+          info::partition_property::partition_by_affinity_domain>(
+          info::partition_affinity_domain::L4_cache);
+      std::cout << "Created " << SubDevicesDomainL4.size()
+                << " subdevices using partition by L4 cache domain scheme."
+                << std::endl;
+    } catch (feature_not_supported) {
+      // okay skip it
+    }
+
+    try {
+      auto SubDevicesDomainL3 = dev.create_sub_devices<
+          info::partition_property::partition_by_affinity_domain>(
+          info::partition_affinity_domain::L3_cache);
+      std::cout << "Created " << SubDevicesDomainL3.size()
+                << " subdevices using partition by L3 cache domain scheme."
+                << std::endl;
+    } catch (feature_not_supported) {
+      // okay skip it
+    }
+
+    try {
+      auto SubDevicesDomainL2 = dev.create_sub_devices<
+          info::partition_property::partition_by_affinity_domain>(
+          info::partition_affinity_domain::L2_cache);
+      std::cout << "Created " << SubDevicesDomainL2.size()
+                << " subdevices using partition by L2 cache domain scheme."
+                << std::endl;
+    } catch (feature_not_supported) {
+      // okay skip it
+    }
+
+    try {
+      auto SubDevicesDomainL1 = dev.create_sub_devices<
+          info::partition_property::partition_by_affinity_domain>(
+          info::partition_affinity_domain::L1_cache);
+      std::cout << "Created " << SubDevicesDomainL1.size()
+                << " subdevices using partition by L1 cache domain scheme."
+                << std::endl;
+    } catch (feature_not_supported) {
+      // okay skip it
+    }
+
+    try {
+      auto SubDevicesDomainNextPart = dev.create_sub_devices<
+          info::partition_property::partition_by_affinity_domain>(
+          info::partition_affinity_domain::next_partitionable);
+      std::cout << "Created " << SubDevicesDomainNextPart.size()
+                << " subdevices using partition by next partitionable "
+                   "domain scheme."
+                << std::endl;
+    } catch (feature_not_supported) {
+      // okay skip it
     }
-  } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what() << std::endl;
-    return 1;
   }
+
   return 0;
 }
diff --git a/sycl/test/built-ins/nan.cpp b/sycl/test/built-ins/nan.cpp
index de1d406c0369..898b4ec0d1ad 100644
--- a/sycl/test/built-ins/nan.cpp
+++ b/sycl/test/built-ins/nan.cpp
@@ -5,10 +5,9 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 // XFAIL: cuda
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 
-#include <cassert>
-
 namespace s = cl::sycl;
 using namespace std;
 
@@ -34,8 +33,8 @@ template <typename T, typename R> void check_nan(s::queue &Queue) {
     });
     Queue.wait_and_throw();
   }
-  assert(s::isnan(Data));
-  assert(s::all(s::isnan(VData)));
+  CHECK(s::isnan(Data));
+  CHECK(s::all(s::isnan(VData)));
 }
 
 int main() {
@@ -58,6 +57,7 @@ int main() {
         std::cerr << "Unknown async exception was caught." << std::endl;
       }
     }
+    throw "ERROR: Asynchronous exception(s)";
   });
 #ifdef HALF_IS_SUPPORTED
   if (Queue.get_device().has_extension("cl_khr_fp16"))
diff --git a/sycl/test/devicelib/assert-windows.cpp b/sycl/test/devicelib/assert-windows.cpp
index 1451431b4694..9d88e8ca1182 100644
--- a/sycl/test/devicelib/assert-windows.cpp
+++ b/sycl/test/devicelib/assert-windows.cpp
@@ -43,6 +43,7 @@ void simple_vadd(const std::array<T, N> &VA, const std::array<T, N> &VB,
         std::cerr << "Unknown async exception was caught." << std::endl;
       }
     }
+    throw "ERROR: Asynchronous exception(s)";
   });
 
   cl::sycl::range<1> numOfItems{N};
diff --git a/sycl/test/devicelib/assert.cpp b/sycl/test/devicelib/assert.cpp
index d0f18fe8cb54..5d4ff9cea098 100644
--- a/sycl/test/devicelib/assert.cpp
+++ b/sycl/test/devicelib/assert.cpp
@@ -120,6 +120,7 @@ void simple_vadd(const std::array<T, N> &VA, const std::array<T, N> &VB,
         std::cerr << "Unknown async exception was caught." << std::endl;
       }
     }
+    throw "ERROR: Asynchronous exception(s)";
   });
   device dev = deviceQueue.get_device();
   bool unsupported = true;
diff --git a/sycl/test/fpga_tests/fpga_pipes.cpp b/sycl/test/fpga_tests/fpga_pipes.cpp
index 8dc6dab9c4f4..1eba903d9972 100644
--- a/sycl/test/fpga_tests/fpga_pipes.cpp
+++ b/sycl/test/fpga_tests/fpga_pipes.cpp
@@ -3,7 +3,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==------------- fpga_pipes.cpp - SYCL FPGA pipes test --------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
diff --git a/sycl/test/fpga_tests/fpga_queue.cpp b/sycl/test/fpga_tests/fpga_queue.cpp
index f9f4a3a72b98..84561f8e1e32 100644
--- a/sycl/test/fpga_tests/fpga_queue.cpp
+++ b/sycl/test/fpga_tests/fpga_queue.cpp
@@ -1,9 +1,13 @@
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+//
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==------------- fpga_queue.cpp - SYCL FPGA queues test -------------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -11,6 +15,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 #include <iostream>
 #include <set>
@@ -22,16 +27,19 @@ const int maxNumQueues = 256;
 
 void GetCLQueue(event sycl_event, std::set<cl_command_queue>& cl_queues) {
   try {
-    cl_command_queue cl_queue;
-    cl_event cl_event = sycl_event.get();
-    cl_int error = clGetEventInfo(cl_event, CL_EVENT_COMMAND_QUEUE,
-                                  sizeof(cl_queue), &cl_queue, nullptr);
-    assert(CL_SUCCESS == error && "Failed to obtain queue from OpenCL event");
-
-    cl_queues.insert(cl_queue);
+    if (!sycl_event.is_host()) {
+      cl_command_queue cl_queue;
+      cl_event cl_event = sycl_event.get();
+      cl_int error = clGetEventInfo(cl_event, CL_EVENT_COMMAND_QUEUE,
+                                    sizeof(cl_queue), &cl_queue, nullptr);
+      CHECK(CL_SUCCESS == error && "Failed to obtain queue from OpenCL event");
+
+      cl_queues.insert(cl_queue);
+    }
   } catch (invalid_object_error e) {
-    std::cout << "Failed to get OpenCL queue from SYCL event: " << e.what()
+    std::cerr << "Failed to get OpenCL queue from SYCL event: " << e.what()
               << std::endl;
+    throw;
   }
 }
 
@@ -42,9 +50,10 @@ int getExpectedQueueNumber(cl_device_id device_id, int default_value) {
                                  sizeof(reportedProps),
                                  &reportedProps,
                                  NULL);
-   assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
+   CHECK(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
    return (reportedProps & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
-              ? 1 : default_value;
+              ? 1
+              : default_value;
 }
 
 int main() {
diff --git a/sycl/test/function-pointers/fp-as-kernel-arg.cpp b/sycl/test/function-pointers/fp-as-kernel-arg.cpp
index 5a5c350a71aa..6700454a76b6 100644
--- a/sycl/test/function-pointers/fp-as-kernel-arg.cpp
+++ b/sycl/test/function-pointers/fp-as-kernel-arg.cpp
@@ -5,7 +5,8 @@
 // FIXME: This test should use runtime early exit once correct check for
 // corresponding extension is implemented
 // UNSUPPORTED: windows
-// XFAIL: cuda
+// UNSUPPORTED: cuda
+// CUDA does not support the function pointer as kernel argument extension.
 
 #include <CL/sycl.hpp>
 
diff --git a/sycl/test/function-pointers/pass-fp-through-buffer.cpp b/sycl/test/function-pointers/pass-fp-through-buffer.cpp
index 744ff30caaa9..43fbb48868a6 100644
--- a/sycl/test/function-pointers/pass-fp-through-buffer.cpp
+++ b/sycl/test/function-pointers/pass-fp-through-buffer.cpp
@@ -5,7 +5,8 @@
 // FIXME: This test should use runtime early exit once correct check for
 // corresponding extension is implemented
 // UNSUPPORTED: windows
-// XFAIL: cuda
+// UNSUPPORTED: cuda
+// CUDA does not support the function pointer as kernel argument extension.
 
 #include <CL/sycl.hpp>
 
diff --git a/sycl/test/helpers.hpp b/sycl/test/helpers.hpp
index e5ca8f768fae..8f44d25db312 100644
--- a/sycl/test/helpers.hpp
+++ b/sycl/test/helpers.hpp
@@ -7,10 +7,20 @@
 //===----------------------------------------------------------------------===//
 
 #include <CL/sycl.hpp>
-
+#include <cstdlib>
 
 using namespace cl;
 
+void check(bool condition, const char *conditionString, const char *filename,
+           const long line) noexcept {
+  if (!condition) {
+    std::cerr << "CHECK failed in " << filename << "#" << line << " " << conditionString << "\n";
+    std::abort();
+  }
+}
+
+#define CHECK(CONDITION) check(CONDITION, #CONDITION, __FILE__, __LINE__)
+
 template <class VecT, int EndIdx = VecT::get_count(), int StartIdx = 0>
 class VecPrinter {
 public:
@@ -68,7 +78,7 @@ class TestQueue : public sycl::queue {
                           std::cerr << E.what() << std::endl;
                         }
                       }
-                      abort();
+                      std::abort();
                     },
                     PropList) {}
 
diff --git a/sycl/test/hier_par/hier_par_basic.cpp b/sycl/test/hier_par/hier_par_basic.cpp
index 6caf3169f555..d4a0c1de92ba 100644
--- a/sycl/test/hier_par/hier_par_basic.cpp
+++ b/sycl/test/hier_par/hier_par_basic.cpp
@@ -306,7 +306,7 @@ int main() {
       });
     }
   } catch (cl::sycl::exception const &e) {
-    std::cout << "SYCL exception caught: " << e.what() << '\n';
+    std::cerr << "SYCL exception caught: " << e.what() << '\n';
     return 2;
   }
 
@@ -314,6 +314,6 @@ int main() {
     std::cout << "Passed\n";
     return 0;
   }
-  std::cout << "FAILED\n";
+  std::cerr << "FAILED\n";
   return 1;
 }
diff --git a/sycl/test/hier_par/hier_par_wgscope.cpp b/sycl/test/hier_par/hier_par_wgscope.cpp
index ae346a178954..7dc373264441 100644
--- a/sycl/test/hier_par/hier_par_wgscope.cpp
+++ b/sycl/test/hier_par/hier_par_wgscope.cpp
@@ -18,8 +18,8 @@
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 
+// XFAIL: cuda
 // TODO: ptxas fatal   : Unresolved extern function '__spirv_ControlBarrier'
-// UNSUPPORTED: cuda
 
 // This test checks correctness of hierarchical kernel execution when there is
 // code and data in the work group scope.
@@ -117,7 +117,7 @@ static bool testWgScope(queue &Q) {
     });
     Q.wait();
   } catch (cl::sycl::exception const &E) {
-    std::cout << "SYCL exception caught: " << E.what() << '\n';
+    std::cerr << "SYCL exception caught: " << E.what() << '\n';
     return 2;
   }
   // verify
@@ -167,7 +167,7 @@ static bool testWgScope(queue &Q) {
     std::cout << "  Passed\n";
     return true;
   }
-  std::cout << "  Failed. Failure rate: " << ErrCnt << "/" << RangeLength << "("
+  std::cerr << "  Failed. Failure rate: " << ErrCnt << "/" << RangeLength << "("
             << ErrCnt / (float)RangeLength * 100.f << "%)\n";
   return false;
 }
@@ -260,13 +260,14 @@ int main() {
       try {
         std::rethrow_exception(ep);
       } catch (std::exception &E) {
-        std::cout << "*** std exception caught:\n";
-        std::cout << E.what();
+        std::cerr << "*** std exception caught:\n";
+        std::cerr << E.what();
       } catch (cl::sycl::exception const &E1) {
-        std::cout << "*** SYCL exception caught:\n";
-        std::cout << E1.what();
+        std::cerr << "*** SYCL exception caught:\n";
+        std::cerr << E1.what();
       }
     }
+    throw "ERROR: Asynchronous exception(s)";
   });
   std::cout << "Using device: "
             << Q.get_device().get_info<cl::sycl::info::device::name>() << "\n";
@@ -276,7 +277,7 @@ int main() {
   Passed &= testPrivateMemory(Q);
 
   if (!Passed) {
-    std::cout << "FAILED\n";
+    std::cerr << "FAILED\n";
     return 1;
   }
   std::cout << "Passed\n";
diff --git a/sycl/test/kernel_from_file/hw.cpp b/sycl/test/kernel_from_file/hw.cpp
index 9f9417ac1eaa..3238621fbb6f 100644
--- a/sycl/test/kernel_from_file/hw.cpp
+++ b/sycl/test/kernel_from_file/hw.cpp
@@ -6,8 +6,7 @@
 
 // TODO: InvalidTargetTriple: Expects spir-unknown-unknown or spir64-unknown-unknown. Actual target triple is x86_64-unknown-linux-gnu
 
-// XFAIL: cuda
-// Currently unsupported on cuda as this test specifically tests a SPV path.
+// UNSUPPORTED: cuda
 
 #include <CL/sycl.hpp>
 #include <iostream>
@@ -44,7 +43,7 @@ int main(int argc, char **argv) {
     std::cout << "Passed\n";
     return 0;
   } else {
-    std::cout << "Failed: " << data << "!= 6(gold)\n";
+    std::cerr << "Failed: " << data << "!= 6(gold)\n";
     return 1;
   }
 }
diff --git a/sycl/test/linear_id/linear-sub_group.cpp b/sycl/test/linear_id/linear-sub_group.cpp
index ba50b0e59277..2b3f75ae2182 100644
--- a/sycl/test/linear_id/linear-sub_group.cpp
+++ b/sycl/test/linear_id/linear-sub_group.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
diff --git a/sycl/test/linear_id/opencl-interop.cpp b/sycl/test/linear_id/opencl-interop.cpp
index 98df80f53137..f18a501b32f0 100644
--- a/sycl/test/linear_id/opencl-interop.cpp
+++ b/sycl/test/linear_id/opencl-interop.cpp
@@ -4,6 +4,8 @@
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
 // REQUIRES: opencl
 // UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+
 //==---------------- opencl-interop.cpp - SYCL linear id test --------------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
diff --git a/sycl/test/lit.cfg.py b/sycl/test/lit.cfg.py
index f4839e086efd..118f6b6a4889 100644
--- a/sycl/test/lit.cfg.py
+++ b/sycl/test/lit.cfg.py
@@ -107,13 +107,13 @@ def getDeviceCount(device_type):
         if len(result) > 1 and len(result[1]):
             print("getDeviceCount {TYPE}:{MSG}".format(
                 TYPE=device_type, MSG=result[1]))
-            if re.match(r".*cuda", result[1]):
+            if re.match(r".*cuda.*", result[1]):
                 is_cuda = True;
         if err:
             print("getDeviceCount {TYPE}:{ERR}".format(
                 TYPE=device_type, ERR=err))
         return [value,is_cuda]
-    return 0
+    return [0, False]
 
 # Every SYCL implementation provides a host implementation.
 config.available_features.add('host')
@@ -151,6 +151,8 @@ def getDeviceCount(device_type):
     config.available_features.add('gpu')
     if cuda:
        config.available_features.add('cuda')
+       gpu_run_substitute += " SYCL_BE=PI_CUDA "
+
 
     if platform.system() == "Linux":
         gpu_run_on_linux_substitute = "env SYCL_DEVICE_TYPE=GPU "
diff --git a/sycl/test/ordered_queue/ordered_queue.cpp b/sycl/test/ordered_queue/ordered_queue.cpp
index 442b8db35859..8269db5881e8 100644
--- a/sycl/test/ordered_queue/ordered_queue.cpp
+++ b/sycl/test/ordered_queue/ordered_queue.cpp
@@ -1,3 +1,7 @@
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+//
 // RUN: %clangxx -fsycl %s -o %t.out -L %opencl_libs_dir -lOpenCL
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 //==---------- ordered_queue.cpp - SYCL ordered queue test -----------------==//
@@ -7,6 +11,7 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 #include <iostream>
 
@@ -27,20 +32,21 @@ void print_queue_info(const ordered_queue &q) {
             << std::endl;
 }
 int main() {
-  try {
+  {
     std::cout << "Create default queue." << std::endl;
     ordered_queue q;
     print_queue_info(q);
-    cl_command_queue_properties reportedProps;
-    cl_int iRet =
-        clGetCommandQueueInfo(q.get(), CL_QUEUE_PROPERTIES,
-                              sizeof(reportedProps), &reportedProps, NULL);
-    assert(CL_SUCCESS == iRet && "Failed to obtain queue info from ocl device");
-    std::cout << "Queue properties bits are " << reportedProps
-              << " and OOO bit is " << CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
-              << std::endl;
-  } catch (device_error e) {
-    std::cout << "Failed to create device for context" << std::endl;
+    if (!q.is_host()) {
+      cl_command_queue_properties reportedProps;
+      cl_int iRet =
+          clGetCommandQueueInfo(q.get(), CL_QUEUE_PROPERTIES,
+                                sizeof(reportedProps), &reportedProps, NULL);
+      CHECK(CL_SUCCESS == iRet &&
+            "Failed to obtain queue info from ocl device");
+      std::cout << "Queue properties bits are " << reportedProps
+                << " and OOO bit is " << CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+                << std::endl;
+    }
   }
 
   auto Devices = device::get_devices();
@@ -51,10 +57,10 @@ int main() {
     ordered_queue Queue(DeviceA);
     size_t Hash = hash_class<ordered_queue>()(Queue);
     ordered_queue MovedQueue(std::move(Queue));
-    assert(Hash == hash_class<ordered_queue>()(MovedQueue));
-    assert(DeviceA.is_host() == MovedQueue.is_host());
+    CHECK(Hash == hash_class<ordered_queue>()(MovedQueue));
+    CHECK(DeviceA.is_host() == MovedQueue.is_host());
     if (!DeviceA.is_host()) {
-      assert(MovedQueue.get() != nullptr);
+      CHECK(MovedQueue.get() != nullptr);
     }
   }
   {
@@ -63,10 +69,10 @@ int main() {
     size_t Hash = hash_class<ordered_queue>()(Queue);
     ordered_queue WillMovedQueue(DeviceB);
     WillMovedQueue = std::move(Queue);
-    assert(Hash == hash_class<ordered_queue>()(WillMovedQueue));
-    assert(DeviceA.is_host() == WillMovedQueue.is_host());
+    CHECK(Hash == hash_class<ordered_queue>()(WillMovedQueue));
+    CHECK(DeviceA.is_host() == WillMovedQueue.is_host());
     if (!DeviceA.is_host()) {
-      assert(WillMovedQueue.get() != nullptr);
+      CHECK(WillMovedQueue.get() != nullptr);
     }
   }
   {
@@ -74,10 +80,10 @@ int main() {
     ordered_queue Queue(DeviceA);
     size_t Hash = hash_class<ordered_queue>()(Queue);
     ordered_queue QueueCopy(Queue);
-    assert(Hash == hash_class<ordered_queue>()(Queue));
-    assert(Hash == hash_class<ordered_queue>()(QueueCopy));
-    assert(Queue == QueueCopy);
-    assert(Queue.is_host() == QueueCopy.is_host());
+    CHECK(Hash == hash_class<ordered_queue>()(Queue));
+    CHECK(Hash == hash_class<ordered_queue>()(QueueCopy));
+    CHECK(Queue == QueueCopy);
+    CHECK(Queue.is_host() == QueueCopy.is_host());
   }
   {
     std::cout << "copy assignment operator" << std::endl;
@@ -85,10 +91,10 @@ int main() {
     size_t Hash = hash_class<ordered_queue>()(Queue);
     ordered_queue WillQueueCopy(DeviceB);
     WillQueueCopy = Queue;
-    assert(Hash == hash_class<ordered_queue>()(Queue));
-    assert(Hash == hash_class<ordered_queue>()(WillQueueCopy));
-    assert(Queue == WillQueueCopy);
-    assert(Queue.is_host() == WillQueueCopy.is_host());
+    CHECK(Hash == hash_class<ordered_queue>()(Queue));
+    CHECK(Hash == hash_class<ordered_queue>()(WillQueueCopy));
+    CHECK(Queue == WillQueueCopy);
+    CHECK(Queue.is_host() == WillQueueCopy.is_host());
   }
 
   {
@@ -97,7 +103,7 @@ int main() {
     try {
       Queue.throw_asynchronous();
     } catch (const std::bad_function_call &e) {
-      std::cout << "Default asynchronous handler call failed: " << e.what()
+      std::cerr << "Default asynchronous handler call failed: " << e.what()
                 << std::endl;
       throw;
     }
@@ -108,6 +114,6 @@ int main() {
     device Device = Selector.select_device();
     context Context(Device);
     ordered_queue Queue(Context, Selector);
-    assert(Context == Queue.get_context());
+    CHECK(Context == Queue.get_context());
   }
 }
diff --git a/sycl/test/ordered_queue/prop.cpp b/sycl/test/ordered_queue/prop.cpp
index 733a121067e0..25d56335520b 100644
--- a/sycl/test/ordered_queue/prop.cpp
+++ b/sycl/test/ordered_queue/prop.cpp
@@ -1,4 +1,8 @@
-// RUN: %clangxx -fsycl %s -o %t1.out -L %opencl_libs_dir -lOpenCL
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out -L %opencl_libs_dir -lOpenCL
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
 
diff --git a/sycl/test/program_manager/env_vars.cpp b/sycl/test/program_manager/env_vars.cpp
index e747eab855e1..39e13d22d66c 100644
--- a/sycl/test/program_manager/env_vars.cpp
+++ b/sycl/test/program_manager/env_vars.cpp
@@ -1,4 +1,9 @@
-// RUN: %clangxx -O0 -fsycl %s -o %t.out -lsycl
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support online compilation/linking.
+// CUDA does not support OpenCL linker options.
+//
+// RUN: %clangxx -O0 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -lsycl
 //
 // Deprecated SYCL_PROGRAM_BUILD_OPTIONS should work as an alias to
 // SYCL_PROGRAM_COMPILE_OPTIONS:
diff --git a/sycl/test/regression/copy-with-unnamed-lambda.cpp b/sycl/test/regression/copy-with-unnamed-lambda.cpp
index 07f427eb6ff0..4ccc7b2b9ba8 100644
--- a/sycl/test/regression/copy-with-unnamed-lambda.cpp
+++ b/sycl/test/regression/copy-with-unnamed-lambda.cpp
@@ -2,7 +2,6 @@
 // The purpose of this test is to check that the following code can be
 // successfully compiled
 #include <CL/sycl.hpp>
-
 #include <iostream>
 
 int main() {
@@ -14,6 +13,7 @@ int main() {
         std::cerr << "Caught async SYCL exception: " << E.what() << std::endl;
       }
     }
+    throw "ERROR: Asynchronous exception(s)";
   };
 
   cl::sycl::queue Q(AsyncHandler);
diff --git a/sycl/test/regression/fp16-with-unnamed-lambda.cpp b/sycl/test/regression/fp16-with-unnamed-lambda.cpp
index ead806dcce68..0ce336b1de39 100644
--- a/sycl/test/regression/fp16-with-unnamed-lambda.cpp
+++ b/sycl/test/regression/fp16-with-unnamed-lambda.cpp
@@ -1,7 +1,7 @@
 // RUN: %clangxx -fsycl -fsycl-unnamed-lambda %s -o %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 #include <CL/sycl.hpp>
-
+#include <cstdlib>
 #include <iostream>
 
 int main() {
@@ -13,6 +13,7 @@ int main() {
         std::cerr << "Caught async SYCL exception: " << E.what() << std::endl;
       }
     }
+    std::abort();
   };
 
   cl::sycl::queue Q(AsyncHandler);
diff --git a/sycl/test/regression/group.cpp b/sycl/test/regression/group.cpp
index 264283181b79..d233b70d8cca 100644
--- a/sycl/test/regression/group.cpp
+++ b/sycl/test/regression/group.cpp
@@ -54,7 +54,7 @@ bool group__get_group_range() {
           });
     });
   } catch (cl::sycl::exception const &E) {
-    std::cout << "SYCL exception caught: " << E.what() << '\n';
+    std::cerr << "SYCL exception caught: " << E.what() << '\n';
     return 2;
   }
   const size_t SIZE_Z = GlobalRange.get(0);
@@ -120,7 +120,7 @@ bool group__get_linear_id() {
           });
     });
   } catch (cl::sycl::exception const &E) {
-    std::cout << "SYCL exception caught: " << E.what() << '\n';
+    std::cerr << "SYCL exception caught: " << E.what() << '\n';
     return 2;
   }
   const size_t SIZE_Z = GlobalRange.get(0);
@@ -150,9 +150,9 @@ bool group__get_linear_id() {
         Pass &= Ok;
 
         if (!Ok && ErrCnt++ < 10) {
-          std::cout << "*** ERROR at [" << Z << "][" << Y << "][" << X << "]: ";
-          std::cout << XTest << " " << YTest << " " << ZTest << " != ";
-          std::cout << XGold << " " << YGold << " " << ZGold << "\n";
+          std::cerr << "*** ERROR at [" << Z << "][" << Y << "][" << X << "]: ";
+          std::cerr << XTest << " " << YTest << " " << ZTest << " != ";
+          std::cerr << XGold << " " << YGold << " " << ZGold << "\n";
         }
       }
     }
@@ -168,7 +168,7 @@ int main() {
   Pass &= group__get_linear_id();
 
   if (!Pass) {
-    std::cout << "FAILED\n";
+    std::cerr << "FAILED\n";
     return 1;
   }
   std::cout << "Passed\n";
diff --git a/sycl/test/regression/image_access.cpp b/sycl/test/regression/image_access.cpp
index 9c11b787c78f..c4b7ca07893d 100644
--- a/sycl/test/regression/image_access.cpp
+++ b/sycl/test/regression/image_access.cpp
@@ -5,11 +5,8 @@
 // TODO: For now PI checks are skipped for ACC device. To decide if it's good.
 // RUN: env %ACC_RUN_PLACEHOLDER %t.out
 
-// TODO: No CUDA image support
-// XFAIL: cuda
-
-// TODO: No CUDA image support
-// XFAIL: cuda
+// UNSUPPORTED: cuda
+// CUDA cannot support OpenCL spec conform images.
 
 //==-------------- image_access.cpp - SYCL image accessors test  -----------==//
 //
@@ -22,29 +19,27 @@
 #include <CL/sycl.hpp>
 
 int main() {
-  try {
-    cl::sycl::range<1> Range(32);
-    std::vector<cl_float> Data(Range.size() * 4, 0.0f);
-    cl::sycl::image<1> Image(Data.data(), cl::sycl::image_channel_order::rgba,
-                             cl::sycl::image_channel_type::fp32, Range);
-    cl::sycl::queue Queue;
-
-    Queue.submit([&](cl::sycl::handler &CGH) {
-      cl::sycl::accessor<cl::sycl::cl_int4, 1, cl::sycl::access::mode::read,
-                         cl::sycl::access::target::image,
-                         cl::sycl::access::placeholder::false_t>
-          A(Image, CGH);
-      CGH.single_task<class MyKernel>([=]() {});
-    });
-    Queue.wait_and_throw();
-
+  // Not catching exceptions to make test fail instead.
+  cl::sycl::range<1> Range(32);
+  std::vector<cl_float> Data(Range.size() * 4, 0.0f);
+  cl::sycl::image<1> Image(Data.data(), cl::sycl::image_channel_order::rgba,
+                           cl::sycl::image_channel_type::fp32, Range);
+  cl::sycl::queue Queue;
+
+  Queue.submit([&](cl::sycl::handler &CGH) {
     cl::sycl::accessor<cl::sycl::cl_int4, 1, cl::sycl::access::mode::read,
-                       cl::sycl::access::target::host_image,
+                       cl::sycl::access::target::image,
                        cl::sycl::access::placeholder::false_t>
-        A(Image);
-  } catch (cl::sycl::exception &E) {
-    std::cout << E.what();
-  }
+        A(Image, CGH);
+    CGH.single_task<class MyKernel>([=]() {});
+  });
+  Queue.wait_and_throw();
+
+  cl::sycl::accessor<cl::sycl::cl_int4, 1, cl::sycl::access::mode::read,
+                     cl::sycl::access::target::host_image,
+                     cl::sycl::access::placeholder::false_t>
+      A(Image);
+
   return 0;
 }
 
diff --git a/sycl/test/scheduler/BasicSchedulerTests.cpp b/sycl/test/scheduler/BasicSchedulerTests.cpp
index 1db052997811..d73670565410 100644
--- a/sycl/test/scheduler/BasicSchedulerTests.cpp
+++ b/sycl/test/scheduler/BasicSchedulerTests.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include <CL/sycl.hpp>
-
 #include <iostream>
 
 using namespace cl;
@@ -36,6 +35,7 @@ template <class TestFuncT> void runTest(TestFuncT TestFunc) {
         std::cerr << "Unknown async exception was caught." << std::endl;
       }
     }
+    throw "ERROR: Asynchronous exception(s)";
   });
 
   TestFunc(Queue);
diff --git a/sycl/test/scheduler/MultipleDevices.cpp b/sycl/test/scheduler/MultipleDevices.cpp
index d27923929871..03b2dae48c97 100644
--- a/sycl/test/scheduler/MultipleDevices.cpp
+++ b/sycl/test/scheduler/MultipleDevices.cpp
@@ -83,7 +83,7 @@ int multidevice_test(queue MyQueue1, queue MyQueue2) {
     int Expected = D;
 
     if (FinalD[i] != D) {
-      std::cout << "Wrong value for element " << i
+      std::cerr << "Wrong value for element " << i
                 << " Expected: " << Expected << " Got: " << FinalD[i]
                 << std::endl;
       return -1;
diff --git a/sycl/test/sub_group/attributes.cpp b/sycl/test/sub_group/attributes.cpp
index e032de24f30d..45c444ec4716 100644
--- a/sycl/test/sub_group/attributes.cpp
+++ b/sycl/test/sub_group/attributes.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUNx: %GPU_RUN_PLACEHOLDER %t.out
@@ -112,7 +112,7 @@ int main() {
       exit_if_not_equal<size_t>(Res, ReqdSize, "compile_sub_group_size");
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     return 1;
   }
 
diff --git a/sycl/test/sub_group/barrier.cpp b/sycl/test/sub_group/barrier.cpp
index b31311179eed..996d7053dc82 100644
--- a/sycl/test/sub_group/barrier.cpp
+++ b/sycl/test/sub_group/barrier.cpp
@@ -3,7 +3,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==---------- barrier.cpp - SYCL sub_group barrier test -------*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -64,7 +64,7 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
       exit_if_not_equal<T>(addacc[j], add, "barrier");
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
diff --git a/sycl/test/sub_group/broadcast.cpp b/sycl/test/sub_group/broadcast.cpp
index 41e73b22fc8a..6e7eb353bfd0 100644
--- a/sycl/test/sub_group/broadcast.cpp
+++ b/sycl/test/sub_group/broadcast.cpp
@@ -1,10 +1,14 @@
+// XFAIL: cuda
+// CUDA compilation and runtime do not yet support sub groups.
+
+
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -D SG_GPU %s -o %t_gpu.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==--------- broadcast.cpp - SYCL sub_group broadcast test ----*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -53,7 +57,7 @@ template <typename T> void check(queue &Queue) {
                            "broadcasted value");
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
diff --git a/sycl/test/sub_group/common.cpp b/sycl/test/sub_group/common.cpp
index 530a3049d740..66584e54228b 100644
--- a/sycl/test/sub_group/common.cpp
+++ b/sycl/test/sub_group/common.cpp
@@ -3,7 +3,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==-------------- common.cpp - SYCL sub_group common test -----*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -70,7 +70,7 @@ void check(queue &Queue, unsigned int G, unsigned int L) {
                         "uniform_group_range");
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
diff --git a/sycl/test/sub_group/common_ocl.cpp b/sycl/test/sub_group/common_ocl.cpp
index 9a8e4afe7cd3..8eed81638283 100644
--- a/sycl/test/sub_group/common_ocl.cpp
+++ b/sycl/test/sub_group/common_ocl.cpp
@@ -1,3 +1,8 @@
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+// CUDA compilation and runtime do not yet support sub groups.
+
 // RUN: %clang_cc1 -x cl -cl-std=CL2.0 %S/sg.cl -triple spir64-unknown-unknown -emit-llvm-bc -o %T/kernel_ocl.bc -include opencl-c.h
 // RUN: llvm-spirv %T/kernel_ocl.bc -o %T/kernel_ocl.spv
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -L %opencl_libs_dir -lOpenCL
@@ -5,7 +10,7 @@
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %T/kernel_ocl.spv
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %T/kernel_ocl.spv
 // RUN: %ACC_RUN_PLACEHOLDER %t.out %T/kernel_ocl.spv
-// UNSUPPORTED: cuda
+
 //==--- common_ocl.cpp - basic SG methods in SYCL vs OpenCL  ---*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -93,7 +98,7 @@ void check(queue &Queue, const int G, const int L, const char *SpvFile) {
                         oclacc[j].uniform_group_range, "uniform_group_range");
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
diff --git a/sycl/test/sub_group/info.cpp b/sycl/test/sub_group/info.cpp
index 9bbe571aa75e..42323eed40b9 100644
--- a/sycl/test/sub_group/info.cpp
+++ b/sycl/test/sub_group/info.cpp
@@ -1,9 +1,14 @@
+// REQUIRES: opencl
+// UNSUPPORTED: cuda
+// CUDA does not support OpenCL interop.
+// CUDA compilation and runtime do not yet support sub groups.
+
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==------------- info.cpp - SYCL sub_group parameters test ----*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -109,7 +114,7 @@ int main() {
       }
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     return 1;
   }
 
diff --git a/sycl/test/sub_group/load_store.cpp b/sycl/test/sub_group/load_store.cpp
index 7f9b105ba272..e01eb70bea9e 100644
--- a/sycl/test/sub_group/load_store.cpp
+++ b/sycl/test/sub_group/load_store.cpp
@@ -1,9 +1,11 @@
+// XFAIL: cuda
+// CUDA compilation and runtime do not yet support sub groups.
+
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 //
 //==----------- load_store.cpp - SYCL sub_group load/store test ------------==//
 //
@@ -81,7 +83,7 @@ template <typename T, int N> void check(queue &Queue) {
       }
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
@@ -134,7 +136,7 @@ template <typename T> void check(queue &Queue) {
     }
 
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
diff --git a/sycl/test/sub_group/reduce.cpp b/sycl/test/sub_group/reduce.cpp
index 24d97cc27626..87f7aafc22aa 100644
--- a/sycl/test/sub_group/reduce.cpp
+++ b/sycl/test/sub_group/reduce.cpp
@@ -1,11 +1,14 @@
+// XFAIL: cuda
+// CUDA compilation and runtime do not yet support sub groups.
+
 //-fsycl-targets=%sycl_triple
 // RUN: %clangxx -fsycl -std=c++14 %s -o %t.out
-// RUN: %clangxx -fsycl -std=c++14 -D SG_GPU %s -o %t_gpu.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -std=c++14 -D SG_GPU %s -o %t_gpu.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==--------------- reduce.cpp - SYCL sub_group reduce test ----*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -62,7 +65,7 @@ void check_op(queue &Queue, T init, BinaryOperation op, bool skip_init = false,
       exit_if_not_equal<T>(acc[j], result, name.c_str());
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
diff --git a/sycl/test/sub_group/scan.cpp b/sycl/test/sub_group/scan.cpp
index bd3a65323212..881293b0325c 100644
--- a/sycl/test/sub_group/scan.cpp
+++ b/sycl/test/sub_group/scan.cpp
@@ -1,11 +1,14 @@
+// XFAIL: cuda
+// CUDA compilation and runtime do not yet support sub groups.
+
 //-fsycl-targets=%sycl_triple
 // RUN: %clangxx -fsycl -std=c++14 %s -o %t.out
-// RUN: %clangxx -fsycl -std=c++14 -D SG_GPU %s -o %t_gpu.out
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -std=c++14 -D SG_GPU %s -o %t_gpu.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t_gpu.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==--------------- scan.cpp - SYCL sub_group scan test --------*- C++ -*---==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
diff --git a/sycl/test/sub_group/shuffle.cpp b/sycl/test/sub_group/shuffle.cpp
index df1818ed77ef..cf971ce648e1 100644
--- a/sycl/test/sub_group/shuffle.cpp
+++ b/sycl/test/sub_group/shuffle.cpp
@@ -1,9 +1,11 @@
+// XFAIL: cuda
+// CUDA compilation and runtime do not yet support sub groups.
+
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUNx: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
 //
 //==------------ shuffle.cpp - SYCL sub_group shuffle test -----*- C++ -*---==//
 //
@@ -125,7 +127,7 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
       exit_if_not_equal_vec(acc_xor[j], vec<T, N>(j ^ SGid), "shuffle_xor");
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
@@ -224,7 +226,7 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
       exit_if_not_equal<T>(acc_xor[j], j ^ SGid, "shuffle_xor");
     }
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
diff --git a/sycl/test/sub_group/vote.cpp b/sycl/test/sub_group/vote.cpp
index 16d0059d86f4..945d717e0537 100644
--- a/sycl/test/sub_group/vote.cpp
+++ b/sycl/test/sub_group/vote.cpp
@@ -1,9 +1,12 @@
+// XFAIL: cuda
+// CUDA compilation and runtime do not yet support sub groups.
+
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
 // RUN: %ACC_RUN_PLACEHOLDER %t.out
-// UNSUPPORTED: cuda
+
 //==--------------- vote.cpp - SYCL sub_group vote test --*- C++ -*---------==//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -69,7 +72,7 @@ void check(queue Queue, const int G, const int L, const int D, const int R) {
     }
 
   } catch (exception e) {
-    std::cout << "SYCL exception caught: " << e.what();
+    std::cerr << "SYCL exception caught: " << e.what();
     exit(1);
   }
 }
diff --git a/sycl/test/usm/allocator_vector_fail.cpp b/sycl/test/usm/allocator_vector_fail.cpp
index f77729f14b6d..8a1375eed77d 100644
--- a/sycl/test/usm/allocator_vector_fail.cpp
+++ b/sycl/test/usm/allocator_vector_fail.cpp
@@ -1,8 +1,10 @@
+// XFAIL: cuda
+// CUDA does not support USM.
+
 // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// XFAIL: cuda
 
 //==-- allocator_vector_fail.cpp - Device Memory Allocator fail test -------==//
 //
diff --git a/sycl/test/usm/badmalloc.cpp b/sycl/test/usm/badmalloc.cpp
index b99f1f50663c..44f6707efd72 100644
--- a/sycl/test/usm/badmalloc.cpp
+++ b/sycl/test/usm/badmalloc.cpp
@@ -5,6 +5,7 @@
 
 // UNSUPPORTED: windows
 // XFAIL: cuda
+// CUDA does not support USM.
 
 //==----------------- badmalloc.cpp - Bad Mallocs test ---------------------==//
 //
diff --git a/sycl/test/usm/memcpy.cpp b/sycl/test/usm/memcpy.cpp
index e5871374ea3c..e519b1e63752 100644
--- a/sycl/test/usm/memcpy.cpp
+++ b/sycl/test/usm/memcpy.cpp
@@ -5,11 +5,14 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+// XFAIL: cuda
+// CUDA does not support USM.
+//
 // RUN: %clangxx -fsycl %s -o %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// XFAIL: cuda
 
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 
 using namespace cl::sycl;
@@ -42,7 +45,7 @@ int main() {
     q.wait_and_throw();
 
     for (int i = 0; i < count; i++) {
-      assert(dest[i] == i * 2);
+      CHECK(dest[i] == i * 2);
     }
 
     try {
@@ -51,7 +54,7 @@ int main() {
         cgh.memcpy(nullptr, src, sizeof(float) * count);
       });
       q.wait_and_throw();
-      assert(false && "Expected error from copying to nullptr");
+      CHECK(false && "Expected error from copying to nullptr");
     } catch (runtime_error e) {
     }
   }
diff --git a/sycl/test/usm/memset.cpp b/sycl/test/usm/memset.cpp
index 4e01415073f6..c0bdd97e6f2f 100644
--- a/sycl/test/usm/memset.cpp
+++ b/sycl/test/usm/memset.cpp
@@ -5,11 +5,14 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+// XFAIL: cuda
+// CUDA does not support USM.
+//
 // RUN: %clangxx -fsycl %s -o %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
-// XFAIL: cuda
 
+#include "../helpers.hpp"
 #include <CL/sycl.hpp>
 
 using namespace cl::sycl;
@@ -38,7 +41,7 @@ int main() {
     q.wait_and_throw();
 
     for (int i = 0; i < count; i++) {
-      assert(src[i] == 0x2a2a2a2a);
+      CHECK(src[i] == 0x2a2a2a2a);
     }
 
     try {
@@ -47,7 +50,7 @@ int main() {
         cgh.memset(nullptr, 0, sizeof(uint32_t) * count);
       });
       q.wait_and_throw();
-      assert(false && "Expected error from writing to nullptr");
+      CHECK(false && "Expected error from writing to nullptr");
     } catch (runtime_error e) {
     }
   }
diff --git a/sycl/test/usm/mixed2template.cpp b/sycl/test/usm/mixed2template.cpp
index 4261187092d7..12ddc999b681 100644
--- a/sycl/test/usm/mixed2template.cpp
+++ b/sycl/test/usm/mixed2template.cpp
@@ -1,4 +1,7 @@
-// RUN: %clangxx -fsycl %s -o %t1.out
+// XFAIL: cuda
+// CUDA does not support USM.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t1.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t1.out
 // RUN: %CPU_RUN_PLACEHOLDER %t1.out
 // RUN: %GPU_RUN_PLACEHOLDER %t1.out
diff --git a/sycl/test/usm/queue_wait.cpp b/sycl/test/usm/queue_wait.cpp
index 76bdaaf4c7b9..1f5b33b7fe97 100644
--- a/sycl/test/usm/queue_wait.cpp
+++ b/sycl/test/usm/queue_wait.cpp
@@ -1,4 +1,7 @@
-// RUN: %clangxx -fsycl %s -o %t.out
+// XFAIL: cuda
+// CUDA does not support USM.
+//
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
 // RUN: env SYCL_DEVICE_TYPE=HOST %t.out
 // RUN: %CPU_RUN_PLACEHOLDER %t.out
 // RUN: %GPU_RUN_PLACEHOLDER %t.out
diff --git a/sycl/test/warnings/warnings.cpp b/sycl/test/warnings/warnings.cpp
index aa737dd19b12..ea2409f828f8 100644
--- a/sycl/test/warnings/warnings.cpp
+++ b/sycl/test/warnings/warnings.cpp
@@ -1,4 +1,4 @@
-// RUN: %clangxx -Wall -Wpessimizing-move -Wunused-variable -Wmismatched-tags -Wunneeded-internal-declaration -Werror -fsycl %s -o %t.out
+// RUN: %clangxx -Wall -Wpessimizing-move -Wunused-variable -Wmismatched-tags -Wunneeded-internal-declaration -Werror -Wno-unknown-cuda-version -fsycl %s -o %t.out
 
 #include <CL/sycl.hpp>
 
diff --git a/sycl/tools/CMakeLists.txt b/sycl/tools/CMakeLists.txt
index dd921f969c66..320ceb26bc10 100644
--- a/sycl/tools/CMakeLists.txt
+++ b/sycl/tools/CMakeLists.txt
@@ -5,20 +5,29 @@ set(CMAKE_CXX_EXTENSIONS OFF)
 add_executable(get_device_count_by_type get_device_count_by_type.cpp)
 add_dependencies(get_device_count_by_type ocl-headers ocl-icd)
 target_link_libraries(get_device_count_by_type
-    PRIVATE OpenCL::Headers
-    PRIVATE ${OpenCL_LIBRARIES}
+  PRIVATE
+    OpenCL::Headers
+    ${OpenCL_LIBRARIES}
+    $<$<BOOL:${SYCL_BUILD_PI_CUDA}>:cudadrv>
+
+)
+target_compile_definitions(get_device_count_by_type
+  PRIVATE
+    $<$<BOOL:${SYCL_BUILD_PI_CUDA}>:USE_PI_CUDA>
 )
 
 add_executable(sycl-check sycl-check.cpp)
 add_dependencies(sycl-check sycl)
 target_include_directories(sycl-check PRIVATE "${sycl_inc_dir}")
 target_link_libraries(sycl-check
-    PRIVATE sycl
-    PRIVATE OpenCL::Headers
-    PRIVATE ${OpenCL_LIBRARIES})
+  PRIVATE
+    sycl
+    OpenCL::Headers
+    ${OpenCL_LIBRARIES})
 
 #Minimum supported version of Intel's OCL GPU and CPU devices
 target_compile_definitions(sycl-check
-     PRIVATE  MIN_INTEL_OCL_GPU_VERSION=\"18.47.11882\"
-     PRIVATE  MIN_INTEL_OCL_CPU_VERSION=\"18.1.0.0901\",\"7.6.0.1202\"
+  PRIVATE
+    MIN_INTEL_OCL_GPU_VERSION=\"18.47.11882\"
+    MIN_INTEL_OCL_CPU_VERSION=\"18.1.0.0901\",\"7.6.0.1202\"
 )
diff --git a/sycl/tools/get_device_count_by_type.cpp b/sycl/tools/get_device_count_by_type.cpp
index 5611685889fa..d44ce55e079d 100644
--- a/sycl/tools/get_device_count_by_type.cpp
+++ b/sycl/tools/get_device_count_by_type.cpp
@@ -9,106 +9,204 @@
 #include <CL/cl.h>
 #include <CL/cl_ext.h>
 
+#include <cstdlib>
+
 #ifdef USE_PI_CUDA
-#include <cuda_driver.h>
-#endif  // USE_PI_CUDA
+#include <cuda_device_runtime_api.h>
+#include <cuda.h>
+#endif // USE_PI_CUDA
 
 #include <iostream>
+#include <sstream>
 #include <string>
 #include <vector>
 
 static const std::string help =
-"   Help\n"
-"   Example: ./get_device_count_by_type cpu opencl\n"
-"   Support types: cpu/gpu/accelerator/default/all\n"
-"   Support backends: cuda/opencl \n"
-"   Output format: <number_of_devices>:<additional_Information>";
-
-int main(int argc, char* argv[]) {
-    if (argc < 3) {
-        std::cout  
-            << "0:Please set a device type and backend to find" << std::endl
-            << help << std::endl;
-        return 0;
-    }
-
-    std::string type = argv[1];
-    std::string backend{argv[2]};
-
-    cl_uint deviceCount = 0;
+    "   Help\n"
+    "   Example: ./get_device_count_by_type cpu opencl\n"
+    "   Support types: cpu/gpu/accelerator/default/all\n"
+    "   Support backends: cuda/opencl \n"
+    "   Output format: <number_of_devices>:<additional_Information>";
+
+const char *deviceTypeToString(cl_device_type deviceType) {
+  const char *str = "unknown";
+  switch (deviceType) {
+  case CL_DEVICE_TYPE_CPU:
+    str = "cpu";
+    break;
+  case CL_DEVICE_TYPE_GPU:
+    str = "gpu";
+    break;
+  case CL_DEVICE_TYPE_ACCELERATOR:
+    str = "accelerator";
+    break;
+  case CL_DEVICE_TYPE_CUSTOM:
+    str = "custom";
+    break;
+  case CL_DEVICE_TYPE_DEFAULT:
+    str = "default";
+    break;
+  case CL_DEVICE_TYPE_ALL:
+    str = "all";
+    break;
+  default:
+    // str already set to express unknown device type.
+    break;
+  }
+
+  return str;
+}
 
-#ifdef USE_PI_CUDA
-    if (backend == "CUDA") {
-      std::string msg{""};
-
-      int runtime_version = 0;
-
-      cudaError_t err = cuDriverGetVersion(&runtime_version);
-      if (runtime_version < 9020 || err != CUDA_SUCCESS) {
-        std::cout << deviceCount << " :Unsupported CUDA Runtime " << std::endl;
-      }
-
-      if (type == "gpu") {
-        deviceCount = 1;
-        msg = "cuda";
-      } else {
-        msg = "Unsupported device type for CUDA backend";
-        msg += " type: ";
-        msg += type;
-      }
-      std::cout << deviceCount << " : " << msg << std::endl;
-      return 0;
+static bool queryOpenCL(cl_device_type deviceType, cl_uint &deviceCount,
+                        std::string &msg) {
+  deviceCount = 0u;
+  cl_int iRet = CL_SUCCESS;
+  cl_uint platformCount = 0;
+
+  iRet = clGetPlatformIDs(0, nullptr, &platformCount);
+  if (iRet != CL_SUCCESS) {
+    if (iRet == CL_PLATFORM_NOT_FOUND_KHR) {
+      msg = "OpenCL error runtime not found";
+    } else {
+      std::stringstream stream;
+      stream << "OpenCL error calling clGetPlatformIDs " << iRet << std::endl;
+      msg = stream.str();
     }
-#endif  // USE_PI_CUDA
-
-    cl_device_type device_type;
-    if (type == "cpu") {
-        device_type = CL_DEVICE_TYPE_CPU;
-    } else if (type == "gpu") {
-        device_type = CL_DEVICE_TYPE_GPU;
-    } else if (type == "accelerator") {
-        device_type = CL_DEVICE_TYPE_ACCELERATOR;
-    } else if (type == "default") {
-        device_type = CL_DEVICE_TYPE_DEFAULT;
-    } else if (type == "all") {
-        device_type = CL_DEVICE_TYPE_ALL;
-    } else  {
-        std::cout << "0:Incorrect device type." << std::endl
-            << help << std::endl;
-        return 0;
+    return false;
+  }
+
+  std::vector<cl_platform_id> platforms(platformCount);
+  iRet = clGetPlatformIDs(platformCount, &platforms[0], nullptr);
+  if (iRet != CL_SUCCESS) {
+    std::stringstream stream;
+    stream << "OpenCL error calling clGetPlatformIDs " << iRet << std::endl;
+    msg = stream.str();
+    return false;
+  }
+
+  for (cl_uint i = 0; i < platformCount; i++) {
+    cl_uint deviceCountPart = 0;
+    iRet =
+        clGetDeviceIDs(platforms[i], deviceType, 0, nullptr, &deviceCountPart);
+    if (iRet == CL_SUCCESS || iRet == CL_DEVICE_NOT_FOUND) {
+      deviceCount += deviceCountPart;
+    } else {
+      deviceCount = 0u;
+      std::stringstream stream;
+      stream << "OpenCL error calling clGetDeviceIDs " << iRet << std::endl;
+      msg = stream.str();
+      return false;
     }
+  }
 
-    cl_int iRet = CL_SUCCESS;
-    cl_uint platformCount = 0;
-
-    iRet = clGetPlatformIDs(0, nullptr, &platformCount);
-    if (iRet != CL_SUCCESS) {
-        if (iRet == CL_PLATFORM_NOT_FOUND_KHR) {
-            std::cout << "0:OpenCL runtime not found " << std::endl;
-        } else {
-            std::cout << "0:A problem at calling function clGetPlatformIDs count "
-                << iRet << std::endl;
-        }
-        return 0;
-    }
-
-    std::vector<cl_platform_id> platforms(platformCount);
-
-    iRet = clGetPlatformIDs(platformCount, &platforms[0], nullptr);
-    if (iRet != CL_SUCCESS) {
-        std::cout << "0:A problem at when calling function clGetPlatformIDs ids " << iRet << std::endl;
-        return 0;
-    }
+  msg = "opencl ";
+  msg += deviceTypeToString(deviceType);
+  return true;
+}
 
-    for (cl_uint i = 0; i < platformCount; i++) {
-        cl_uint deviceCountPart = 0;
-        iRet = clGetDeviceIDs(platforms[i], device_type, 0, nullptr, &deviceCountPart);
-        if (iRet == CL_SUCCESS) {
-            deviceCount += deviceCountPart;
-        }
+static bool queryCUDA(cl_device_type deviceType, cl_uint &deviceCount,
+                      std::string &msg) {
+  deviceCount = 0u;
+#ifdef USE_PI_CUDA
+  const unsigned int defaultFlag = 0;
+  CUresult err = cuInit(defaultFlag);
+  if (err != CUDA_SUCCESS) {
+    msg = "CUDA initialization error";
+    return false;
+  }
+
+  const int minRuntimeVersion = 10010;
+  int runtimeVersion = 0;
+  err = cuDriverGetVersion(&runtimeVersion);
+  if (err != CUDA_SUCCESS) {
+    msg = "CUDA error querying driver version";
+    return false;
+  }
+
+  if (runtimeVersion < minRuntimeVersion) {
+    std::stringstream stream;
+    stream << "CUDA version not supported " << runtimeVersion;
+    msg = stream.str();
+    return false;
+  }
+
+  switch (deviceType) {
+  case CL_DEVICE_TYPE_DEFAULT: // Fall through.
+  case CL_DEVICE_TYPE_ALL:     // Fall through.
+  case CL_DEVICE_TYPE_GPU: {
+    int count = 0;
+    CUresult err = cuDeviceGetCount(&count);
+    if (err != CUDA_SUCCESS || count < 0) {
+      msg = "CUDA error querying device count";
+      return false;
     }
 
-    std::cout << deviceCount << ":" << backend << std::endl;
+    deviceCount = static_cast<cl_uint>(count);
+    msg = "cuda ";
+    msg += deviceTypeToString(deviceType);
+    return true;
+  } break;
+  default:
+    msg = "CUDA unsupported device type ";
+    msg += deviceTypeToString(deviceType);
+    return false;
+  }
+#else
+  msg = "CUDA not supported";
+  deviceCount = 0u;
+
+  return false;
+#endif
+}
 
-    return 0;
+int main(int argc, char *argv[]) {
+  if (argc < 3) {
+    std::cout << "0:Please set a device type and backend to find" << std::endl
+              << help << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  std::string type = argv[1];
+  std::string backend{argv[2]};
+
+  cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT;
+  if (type == "cpu") {
+    deviceType = CL_DEVICE_TYPE_CPU;
+  } else if (type == "gpu") {
+    deviceType = CL_DEVICE_TYPE_GPU;
+  } else if (type == "accelerator") {
+    deviceType = CL_DEVICE_TYPE_ACCELERATOR;
+  } else if (type == "default") {
+    deviceType = CL_DEVICE_TYPE_DEFAULT;
+  } else if (type == "all") {
+    deviceType = CL_DEVICE_TYPE_ALL;
+  } else {
+    std::cout << "0:Incorrect device type " << type << "\n"
+              << help << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  std::string msg;
+  cl_uint deviceCount = 0;
+
+  bool querySuccess = false;
+
+  if (backend == "opencl" || backend == "OpenCL" || backend == "OPENCL" ||
+      backend == "PI_OPENCL") {
+    querySuccess = queryOpenCL(deviceType, deviceCount, msg);
+  } else if (backend == "cuda" || backend == "CUDA" || backend == "PI_CUDA") {
+    querySuccess = queryCUDA(deviceType, deviceCount, msg);
+  } else {
+    std::stringstream stream;
+    stream << "Unknown backend" << backend << "\n" << help << std::endl;
+    msg = stream.str();
+  }
+
+  std::cout << deviceCount << ":" << msg << std::endl;
+
+  if (!querySuccess) {
+    return EXIT_FAILURE;
+  }
+
+  return EXIT_SUCCESS;
 }