From 1684403d73cc9529eccbbcb932d71fb1d27d39cb Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Fri, 30 Apr 2021 17:13:42 -0500 Subject: [PATCH 1/2] Various code style automation for dpctl 1. `flake8`, `isort` jobs added to check for Python code style for new PRs against `master`, `gold/2021`, `release` branches. 2. `black` max line width changed to 80 chars. 3. `flake8` and `isort` configs added. 4. `pre-commit` hooks added to make it possible to run all these checks prior to pushing code. 5. `CONTRIBUTION.md` updated. --- .flake8 | 28 ++++++++++ .github/workflows/cpp_style_checks.yml | 22 ++++++++ .github/workflows/python_style_checks.yml | 58 ++++++++++++++++++++ .pre-commit-config.yaml | 29 ++++++++++ CONTRIBUTING.md | 64 +++++++++++++++++------ README.md | 1 + pyproject.toml | 12 ++++- setup.cfg | 5 -- 8 files changed, 196 insertions(+), 23 deletions(-) create mode 100644 .flake8 create mode 100644 .github/workflows/cpp_style_checks.yml create mode 100644 .github/workflows/python_style_checks.yml diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000..bbe8a35244 --- /dev/null +++ b/.flake8 @@ -0,0 +1,28 @@ +[flake8] +filename = *.py, *.pyx +max_line_length = 80 +max-doc-length = 80 +extend-ignore = E203, W503 +show-source = True + +exclude = + versioneer.py + dpctl/_version.py + build + conda.recipe + .git + +per-file-ignores = + dpctl/_sycl_context.pyx: E999, E225, E227 + dpctl/_sycl_device.pyx: E999, E225 + dpctl/_sycl_device_factory.pyx: E999, E225 + dpctl/_sycl_event.pyx: E999, E225 + dpctl/_sycl_platform.pyx: E999, E225 + dpctl/_sycl_queue.pyx: E999, E225, E226, E227 + dpctl/_sycl_queue_manager.pyx: E999, E225 + dpctl/memory/_memory.pyx: E999, E225, E226, E227 + dpctl/program/_program.pyx: E999, E225, E226, E227 + dpctl/tensor/numpy_usm_shared.py: F821 + examples/cython/sycl_buffer/_buffer_example.pyx: E999, E225, E402 + examples/cython/sycl_direct_linkage/_buffer_example.pyx: E999, E225, E402 + examples/cython/usm_memory/blackscholes.pyx: E999, E225, E226, E402 diff --git a/.github/workflows/cpp_style_checks.yml b/.github/workflows/cpp_style_checks.yml new file mode 100644 index 0000000000..b5a7def26f --- /dev/null +++ b/.github/workflows/cpp_style_checks.yml @@ -0,0 +1,22 @@ +# This is a workflow to format C/C++ sources with clang-format + +name: C++ Code Style + +# Controls when the action will run. Triggers the workflow on push or pull request +# events but only for the master branch +on: + pull_request: + push: + branches: [master] + +jobs: + formatting-check: + name: clang-format + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Run clang-format style check for C/C++ programs. + uses: jidicula/clang-format-action@v3.1.0 + with: + clang-format-version: '11' + check-path: 'dpctl-capi' diff --git a/.github/workflows/python_style_checks.yml b/.github/workflows/python_style_checks.yml new file mode 100644 index 0000000000..f48e567625 --- /dev/null +++ b/.github/workflows/python_style_checks.yml @@ -0,0 +1,58 @@ +# This is a workflow to format Python code with black formatter + +name: Python Code Style + +# Controls when the action will run. Triggers the workflow on push or pull request +# events but only for the master branch +on: + pull_request: + push: + branches: [master] + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + # The isort job sorts all imports in .py, .pyx, .pxd files + isort: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - uses: jamescurtin/isort-action@master + with: + args: ". --check-only" + + black: + # The type of runner that the job will run on + runs-on: ubuntu-latest + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + # Set up a Python environment for use in actions + - uses: actions/setup-python@v2 + + # Run black code formatter + - uses: psf/black@21.4b2 + with: + args: ". --check" + + flake8: + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: [3.7] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 + - name: Lint with flake8 + uses: py-actions/flake8@v1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 51a095222b..0554e90007 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,3 +7,32 @@ repos: - id: bandit pass_filenames: false args: ["-r", "dpctl", "-lll"] +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/psf/black + rev: 21.4b2 + hooks: + - id: black + exclude: "versioneer.py|dpctl/_version.py" +- repo: https://github.com/pycqa/isort + rev: 5.8.0 + hooks: + - id: isort + name: isort (python) + - id: isort + name: isort (cython) + types: [cython] + - id: isort + name: isort (pyi) + types: [pyi] +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.1 + hooks: + - id: flake8 +- repo: https://github.com/pocc/pre-commit-hooks + rev: v1.1.1 + hooks: + - id: clang-format diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index efa64123c5..c43830c903 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -15,12 +15,30 @@ Run before each commit: `clang-format -style=file -i dpctl-capi/include/*.h dpct ### Python code style -We use [black](https://black.readthedocs.io/en/stable/) code formatter. +We use the following Python code style tools: +- [black](https://black.readthedocs.io/en/stable/) code formatter. + - Revision: `20.8b1`. +- [flake8](https://flake8.pycqa.org/en/latest/) linter. + - Revision `3.9.1`. +- [isort](https://pycqa.github.io/isort/) import sorter. + - Revision `5.8.0`. -- Revision: `20.8b1`. -- See configuration in `pyproject.toml`. +- Refer `pyproject.toml` and `.flake8` config files for current configurations. -Run before each commit: `black .` +Please run these three tools before each commit. Although, you may choose to +do so manually, but it is much easier and preferable to automate these checks. +Refer your IDE docs to set them up in your IDE, or you can set up `pre-commit` +to add git hooks. + +### Setting up pre-commit + +A `.pre-commit-config.yaml` is included to run various check before you +commit your code. Here are the steps to setup `pre-commit` in your workflow: + +- Install `pre-commit`: `pip install pre-commit` +- Install the git hook scripts: `pre-commit install` + +That should be it! ### C/C++ File Headers @@ -61,8 +79,9 @@ Few things to note about this format: - The copyright year should be updated every calendar year. - Each comment line should be a max of 80 chars. - A Doxygen `\file` tag describing the contents of the file must be provided. - Also note that the `\file` tag is inside a Doxygen comment block (defined by `///` - comment marker instead of the `//` comment marker used in the rest of the header. + Also note that the `\file` tag is inside a Doxygen comment block ( + defined by `///` comment marker instead of the `//` comment marker used in the + rest of the header. ### Python File Headers @@ -91,7 +110,8 @@ The copyright year should be updated every calendar year. ### Bandit -We use [Bandit](https://github.com/PyCQA/bandit) to find common security issues in Python code. +We use [Bandit](https://github.com/PyCQA/bandit) to find common security issues +in Python code. Install: `pip install bandit` @@ -101,18 +121,21 @@ Run before each commit: `bandit -r dpctl -lll` ## Code Coverage -Implement python, cython and c++ file coverage using `coverage` and `llvm-cov` packages on Linux. +Implement python, cython and c++ file coverage using `coverage` and `llvm-cov` +packages on Linux. ### Using Code Coverage -You need to install additional packages and add an environment variable to cover: +You need to install additional packages and add an environment variable to +cover: - conda install cmake - conda install coverage - conda install conda-forge::lcov - conda install conda-forge::gtest - export CODE_COVERAGE=ON -CODE_COVERAGE allows you to build a debug version of dpctl and enable string tracing, which allows you to analyze strings to create a coverage report. +CODE_COVERAGE allows you to build a debug version of dpctl and enable string +tracing, which allows you to analyze strings to create a coverage report. It was added for the convenience of configuring the CI in the future. Installing the dpctl package: @@ -121,19 +144,24 @@ Installing the dpctl package: It is important that there are no files of the old build in the folder. Use `git clean -xdf` to clean up the working tree. -The coverage report will appear during the build in the console. This report contains information about c++ file coverage. -The `dpctl-c-api-coverage` folder will appear in the root folder after installation. -The folder contains a report on the coverage of c++ files in html format. +The coverage report will appear during the build in the console. This report +contains information about c++ file coverage. +The `dpctl-c-api-coverage` folder will appear in the root folder after +installation. The folder contains a report on the coverage of c++ files in html +format. You need to run tests to cover the cython and python files: - coverage run -m unittest dpctl.tests -The required flags for the command coverage run are contained in the file `.coveragerc`. +The required flags for the command coverage run are contained in the file +`.coveragerc`. The simplest reporting is a textual summary produced with report: - coverage report -For each module executed, the report shows the count of executable statements, the number of those statements missed, and the resulting coverage, expressed as a percentage. +For each module executed, the report shows the count of executable statements, +the number of those statements missed, and the resulting coverage, expressed as +a percentage. The `-m` flag also shows the line numbers of missing statements: - coverage report -m @@ -141,14 +169,16 @@ The `-m` flag also shows the line numbers of missing statements: To create an annotated HTML listings with coverage results: - coverage html -The `htmlcov` folder will appear in the root folder of the project. It contains reports on the coverage of python and cython files in html format. +The `htmlcov` folder will appear in the root folder of the project. It contains +reports on the coverage of python and cython files in html format. Erase previously collected coverage data: - coverage erase ### Error in the build process -An error occurs during the dcptl build with the CODE_COVERAGE environment variable: +An error occurs during the dcptl build with the CODE_COVERAGE environment +variable: ``` error: '../compat/unistd.h' file not found, did you mean 'compat/unistd.h'? # include "../compat/unistd.h" diff --git a/README.md b/README.md index bcdd7fb9c5..224176e268 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +[![Imports: isort](https://img.shields.io/badge/%20imports-isort-%231674b1?style=flat&labelColor=ef8336)](https://pycqa.github.io/isort/) What? ==== diff --git a/pyproject.toml b/pyproject.toml index 2b233b61c9..7598e26713 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,12 @@ [tool.black] -exclude = 'versioneer.py' +exclude = "versioneer.py|dpctl/_version.py" +line-length = 80 + +[tool.isort] +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true +line_length = 80 +skip = ["versioneer.py", "dpctl/_version.py"] diff --git a/setup.cfg b/setup.cfg index f360aaf2d3..e754d4e620 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,8 +1,3 @@ -[flake8] -max-line-length = 100 -ignore = E122,E123,E126,E127,E128,E731,E722 -exclude = build,dpctl/_version.py,tests,conda.recipe,.git,versioneer.py,benchmarks,.asv - [tool:pytest] norecursedirs= .* *.egg* build dist conda.recipe addopts = From 084ba59ce87efb55e09fff6ae33665d0f14f22b5 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Fri, 30 Apr 2021 17:17:56 -0500 Subject: [PATCH 2/2] Code changes to comply with latest linters --- docs/conf.in | 6 +- dpctl/__init__.py | 129 ++++++--- dpctl/_backend.pxd | 2 +- dpctl/_sycl_context.pxd | 4 +- dpctl/_sycl_context.pyx | 57 ++-- dpctl/_sycl_device.pxd | 5 +- dpctl/_sycl_device.pyx | 263 ++++++++++-------- dpctl/_sycl_device_factory.pxd | 2 + dpctl/_sycl_device_factory.pyx | 105 ++++--- dpctl/_sycl_event.pyx | 14 +- dpctl/_sycl_platform.pxd | 5 +- dpctl/_sycl_platform.pyx | 35 ++- dpctl/_sycl_queue.pxd | 12 +- dpctl/_sycl_queue.pyx | 152 +++++----- dpctl/_sycl_queue_manager.pyx | 24 +- dpctl/enum_types.py | 1 - dpctl/memory/__init__.py | 5 +- dpctl/memory/_memory.pxd | 2 +- dpctl/memory/_memory.pyx | 165 +++++++---- dpctl/program/__init__.py | 17 +- dpctl/program/_program.pyx | 67 +++-- dpctl/tensor/__init__.py | 11 +- dpctl/tensor/numpy_usm_shared.py | 62 +++-- dpctl/tests/__init__.py | 12 - dpctl/tests/test_dparray.py | 4 +- dpctl/tests/test_sycl_context.py | 28 +- dpctl/tests/test_sycl_device.py | 51 +++- dpctl/tests/test_sycl_device_factory.py | 6 +- dpctl/tests/test_sycl_kernel_submit.py | 9 +- dpctl/tests/test_sycl_platform.py | 6 +- dpctl/tests/test_sycl_program.py | 12 +- dpctl/tests/test_sycl_queue.py | 24 +- dpctl/tests/test_sycl_queue_manager.py | 20 +- dpctl/tests/test_sycl_queue_memcpy.py | 8 +- dpctl/tests/test_sycl_usm.py | 22 +- .../cython/sycl_buffer/_buffer_example.pyx | 20 +- examples/cython/sycl_buffer/bench.py | 13 +- examples/cython/sycl_buffer/run.py | 3 +- examples/cython/sycl_buffer/setup.py | 20 +- .../sycl_direct_linkage/_buffer_example.pyx | 15 +- examples/cython/sycl_direct_linkage/bench.py | 16 +- examples/cython/sycl_direct_linkage/run.py | 2 +- examples/cython/sycl_direct_linkage/setup.py | 20 +- examples/cython/usm_memory/blackscholes.pyx | 86 ++++-- examples/cython/usm_memory/run.py | 42 ++- examples/cython/usm_memory/setup.py | 20 +- examples/python/_runner.py | 10 +- examples/python/subdevices.py | 4 +- examples/python/usm_memory_allocation.py | 4 +- examples/python/usm_memory_host_access.py | 7 +- examples/python/usm_memory_operation.py | 10 +- scripts/build_backend.py | 28 +- setup.py | 23 +- 53 files changed, 1062 insertions(+), 628 deletions(-) diff --git a/docs/conf.in b/docs/conf.in index cf099565f7..75e7f60981 100644 --- a/docs/conf.in +++ b/docs/conf.in @@ -41,7 +41,8 @@ use_doxyrest = "@DPCTL_ENABLE_DOXYREST@" if use_doxyrest == "ON": # Specify the path to Doxyrest extensions for Sphinx: - import sys, os + import os + import sys sys.path.insert( 1, @@ -168,9 +169,8 @@ class ClassMembersDocumenter(ClassDocumenter): # members and attributes. # See https://stackoverflow.com/questions/20569011/python-sphinx-autosummary-automated-listing-of-member-functions -from sphinx.ext.autosummary import Autosummary -from sphinx.ext.autosummary import get_documenter from docutils.parsers.rst import directives +from sphinx.ext.autosummary import Autosummary, get_documenter from sphinx.util.inspect import safe_getattr diff --git a/dpctl/__init__.py b/dpctl/__init__.py index 47a2a58253..d4075edc7e 100644 --- a/dpctl/__init__.py +++ b/dpctl/__init__.py @@ -19,45 +19,104 @@ Dpctl's Python API implements Python wrappers for a subset of DPC++/SYCL's API. The Python API exposes wrappers for the SYCL runtime classes (expect - `device_selector`) described in Section 4.6 of the SYCL 2020 spec (https://www.khronos.org/registry/SYCL/specs/sycl-2020/html/sycl-2020.html#_sycl_runtime_classes). - Apart from the main SYCL runtime classes, dpctl includes a `memory` sub-module that exposes the SYCL USM allocators and deallocators. + ``device_selector``) described in Section 4.6 of the + [SYCL 2020 spec](https://www.khronos.org/registry/SYCL/specs/sycl-2020/ +html/sycl-2020.html#_sycl_runtime_classes). + Apart from the main SYCL runtime classes, dpctl includes a `memory` + sub-module that exposes the SYCL USM allocators and deallocators. """ __author__ = "Intel Corp." -from .enum_types import * -from .enum_types import __all__ as _enum_types_all__ -from dpctl._sycl_context import * -from dpctl._sycl_context import __all__ as _sycl_context__all__ -from dpctl._sycl_device import * -from dpctl._sycl_device import __all__ as _sycl_device__all__ -from dpctl._sycl_device_factory import * -from dpctl._sycl_device_factory import __all__ as _sycl_device_factory__all__ -from dpctl._sycl_event import * -from dpctl._sycl_event import __all__ as _sycl_event__all__ -from dpctl._sycl_platform import * -from dpctl._sycl_platform import __all__ as _sycl_platform__all__ -from dpctl._sycl_queue import * -from dpctl._sycl_queue import __all__ as _sycl_queue__all__ -from dpctl._sycl_queue_manager import * -from dpctl._sycl_queue_manager import __all__ as _sycl_qm__all__ -from ._version import get_versions +from dpctl._sycl_context import SyclContext +from dpctl._sycl_device import SyclDevice +from dpctl._sycl_device_factory import ( + get_devices, + get_num_devices, + has_accelerator_devices, + has_cpu_devices, + has_gpu_devices, + has_host_device, + select_accelerator_device, + select_cpu_device, + select_default_device, + select_gpu_device, + select_host_device, +) +from dpctl._sycl_event import SyclEvent +from dpctl._sycl_platform import SyclPlatform, get_platforms, lsplatform +from dpctl._sycl_queue import ( + SyclKernelInvalidRangeError, + SyclKernelSubmitError, + SyclQueue, + SyclQueueCreationError, +) +from dpctl._sycl_queue_manager import ( + device_context, + get_current_backend, + get_current_device_type, + get_current_queue, + get_num_activated_queues, + is_in_device_context, + set_global_queue, +) +from ._version import get_versions +from .enum_types import backend_type, device_type -__all__ = ( - _sycl_context__all__ - + _sycl_device__all__ - + _sycl_device_factory__all__ - + _sycl_event__all__ - + _sycl_platform__all__ - + _sycl_queue__all__ - + _sycl_qm__all__ - + _enum_types_all__ -) +__all__ = [ + "SyclContext", +] +__all__ += [ + "SyclDevice", +] +__all__ += [ + "get_devices", + "select_accelerator_device", + "select_cpu_device", + "select_default_device", + "select_gpu_device", + "select_host_device", + "get_num_devices", + "has_cpu_devices", + "has_gpu_devices", + "has_accelerator_devices", + "has_host_device", +] +__all__ += [ + "SyclEvent", +] +__all__ += [ + "get_platforms", + "lsplatform", + "SyclPlatform", +] +__all__ += [ + "SyclQueue", + "SyclKernelInvalidRangeError", + "SyclKernelSubmitError", + "SyclQueueCreationError", +] +__all__ += [ + "device_context", + "get_current_backend", + "get_current_device_type", + "get_current_queue", + "get_num_activated_queues", + "is_in_device_context", + "set_global_queue", +] +__all__ += [ + "device_type", + "backend_type", +] +__all__ += [ + "get_include", +] def get_include(): - r""" - Return the directory that contains the dpctl \*.h header files. + """ + Return the directory that contains the dpctl *.h header files. Extension modules that need to be compiled against dpctl should use this function to locate the appropriate include directory. @@ -69,11 +128,3 @@ def get_include(): __version__ = get_versions()["version"] del get_versions -del _sycl_context__all__ -del _sycl_device__all__ -del _sycl_device_factory__all__ -del _sycl_event__all__ -del _sycl_queue__all__ -del _sycl_qm__all__ -del _sycl_platform__all__ -del _enum_types_all__ diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index cb6188d8a2..f6e747d24b 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -21,8 +21,8 @@ types defined by dpctl's C API. """ +from libc.stdint cimport int64_t, uint32_t from libcpp cimport bool -from libc.stdint cimport uint32_t, int64_t cdef extern from "dpctl_error_handler_type.h": diff --git a/dpctl/_sycl_context.pxd b/dpctl/_sycl_context.pxd index c97a1bddb8..0549682b78 100644 --- a/dpctl/_sycl_context.pxd +++ b/dpctl/_sycl_context.pxd @@ -20,9 +20,11 @@ """ This file declares the SyclContext extension type. """ +from libcpp cimport bool + from ._backend cimport DPCTLSyclContextRef from ._sycl_device cimport SyclDevice -from libcpp cimport bool + cdef class _SyclContext: """ Data owner for SyclContext diff --git a/dpctl/_sycl_context.pyx b/dpctl/_sycl_context.pyx index a7eda8c328..cca15f3ee6 100644 --- a/dpctl/_sycl_context.pyx +++ b/dpctl/_sycl_context.pyx @@ -21,31 +21,34 @@ """ from __future__ import print_function + import logging -from ._backend cimport( - DPCTLSyclContextRef, - DPCTLSyclDeviceRef, + +from cpython cimport pycapsule +from cpython.mem cimport PyMem_Free, PyMem_Malloc + +from ._backend cimport ( # noqa: E211 + DPCTLContext_AreEq, + DPCTLContext_Copy, DPCTLContext_Create, DPCTLContext_CreateFromDevices, + DPCTLContext_Delete, DPCTLContext_DeviceCount, DPCTLContext_GetDevices, - DPCTLContext_Copy, - DPCTLContext_Delete, - DPCTLContext_AreEq, - DPCTLDevice_Delete, DPCTLDevice_Copy, - DPCTLDeviceVectorRef, + DPCTLDevice_Delete, + DPCTLDeviceMgr_GetCachedContext, DPCTLDeviceVector_CreateFromArray, + DPCTLDeviceVector_Delete, DPCTLDeviceVector_GetAt, DPCTLDeviceVector_Size, - DPCTLDeviceVector_Delete, + DPCTLDeviceVectorRef, + DPCTLSyclContextRef, + DPCTLSyclDeviceRef, error_handler_callback, - DPCTLDeviceMgr_GetCachedContext, ) -from ._sycl_queue cimport default_async_error_handler from ._sycl_device cimport SyclDevice -from cpython.mem cimport PyMem_Malloc, PyMem_Free -from cpython cimport pycapsule +from ._sycl_queue cimport default_async_error_handler __all__ = [ "SyclContext", @@ -56,7 +59,9 @@ _logger = logging.getLogger(__name__) cdef void _context_capsule_deleter(object o): cdef DPCTLSyclContextRef CRef = NULL if pycapsule.PyCapsule_IsValid(o, "SyclContextRef"): - CRef = pycapsule.PyCapsule_GetPointer(o, "SyclContextRef") + CRef = pycapsule.PyCapsule_GetPointer( + o, "SyclContextRef" + ) DPCTLContext_Delete(CRef) @@ -124,7 +129,7 @@ cdef class SyclContext(_SyclContext): # Create a CPU device using the opencl driver cpu_d = dpctl.SyclDevice("opencl:cpu") - # Partition the CPU device into sub-devices, each with two cores. + # Partition the CPU device into sub-devices with two cores each. sub_devices = cpu_d.create_sub_devices(partition=2) # Create a context common to all the sub-devices. ctx = dpctl.SyclContext(sub_devices) @@ -154,7 +159,7 @@ cdef class SyclContext(_SyclContext): @staticmethod cdef void _init_helper(_SyclContext context, DPCTLSyclContextRef CRef): - context._ctxt_ref = CRef + context._ctxt_ref = CRef @staticmethod cdef SyclContext _create(DPCTLSyclContextRef ctxt): @@ -254,8 +259,8 @@ cdef class SyclContext(_SyclContext): return 0 else: # __cinit__ checks that capsule is valid, so one can be here only - # if call to `_init_context_from_capsule` was made outside of __cinit__ - # and the capsule was not checked to be valid + # if call to `_init_context_from_capsule` was made outside of + # __cinit__ and the capsule was not checked to be valid return -128 def __cinit__(self, arg=None): @@ -266,8 +271,8 @@ cdef class SyclContext(_SyclContext): ret = self._init_context_from_one_device( arg, 0) elif pycapsule.PyCapsule_IsValid(arg, "SyclContextRef"): status = self._init_context_from_capsule(arg) - elif isinstance( - arg, (list, tuple)) and all([isinstance(argi, SyclDevice) for argi in arg] + elif isinstance(arg, (list, tuple)) and all( + [isinstance(argi, SyclDevice) for argi in arg] ): ret = self._init_context_from_devices(arg, 0) else: @@ -333,7 +338,7 @@ cdef class SyclContext(_SyclContext): cdef DPCTLSyclContextRef get_context_ref(self): return self._ctxt_ref - def addressof_ref (self): + def addressof_ref(self): """ Returns the address of the ``DPCTLSyclContextRef`` pointer as a ``size_t``. @@ -416,7 +421,8 @@ cdef class SyclContext(_SyclContext): cpu_d = dpctl.SyclDevice("opencl:cpu") sub_devices = create_sub_devices(partition=2) ctx2 = dpctl.SyclContext(sub_devices) - print(ctx2) # E.g : + # prints: + print(ctx2) Returns: :obj:`str`: A string representation of the @@ -427,8 +433,11 @@ cdef class SyclContext(_SyclContext): if n == 1: return ("".format(hex(id(self)))) else: - return ("" - .format(n, hex(id(self)))) + return ( + "".format(n, hex(id(self))) + ) def _get_capsule(self): """ diff --git a/dpctl/_sycl_device.pxd b/dpctl/_sycl_device.pxd index b14d3fa22f..b3923f50e3 100644 --- a/dpctl/_sycl_device.pxd +++ b/dpctl/_sycl_device.pxd @@ -20,12 +20,13 @@ """ This file declares the SyclDevice extension type. """ +from libcpp cimport bool as cpp_bool + from ._backend cimport ( DPCTLSyclDeviceRef, DPCTLSyclDeviceSelectorRef, - _partition_affinity_domain_type + _partition_affinity_domain_type, ) -from libcpp cimport bool as cpp_bool cdef class _SyclDevice: diff --git a/dpctl/_sycl_device.pyx b/dpctl/_sycl_device.pyx index a0f44569c0..34ba8eae19 100644 --- a/dpctl/_sycl_device.pyx +++ b/dpctl/_sycl_device.pyx @@ -20,71 +20,74 @@ """ Implements SyclDevice Cython extension type. """ -from ._backend cimport ( - _aspect_type, - _backend_type, - _device_type, - _partition_affinity_domain_type, +from ._backend cimport ( # noqa: E211 DPCTLCString_Delete, DPCTLDefaultSelector_Create, + DPCTLDevice_AreEq, DPCTLDevice_Copy, DPCTLDevice_CreateFromSelector, + DPCTLDevice_CreateSubDevicesByAffinity, + DPCTLDevice_CreateSubDevicesByCounts, + DPCTLDevice_CreateSubDevicesEqually, DPCTLDevice_Delete, - DPCTLDeviceVectorRef, - DPCTLDeviceVector_Delete, - DPCTLDeviceVector_GetAt, - DPCTLDeviceVector_Size, DPCTLDevice_GetBackend, - DPCTLDevice_AreEq, DPCTLDevice_GetDeviceType, DPCTLDevice_GetDriverVersion, + DPCTLDevice_GetImage2dMaxHeight, + DPCTLDevice_GetImage2dMaxWidth, + DPCTLDevice_GetImage3dMaxDepth, + DPCTLDevice_GetImage3dMaxHeight, + DPCTLDevice_GetImage3dMaxWidth, DPCTLDevice_GetMaxComputeUnits, DPCTLDevice_GetMaxNumSubGroups, + DPCTLDevice_GetMaxReadImageArgs, DPCTLDevice_GetMaxWorkGroupSize, DPCTLDevice_GetMaxWorkItemDims, DPCTLDevice_GetMaxWorkItemSizes, - DPCTLDevice_GetVendor, + DPCTLDevice_GetMaxWriteImageArgs, DPCTLDevice_GetName, + DPCTLDevice_GetParentDevice, + DPCTLDevice_GetPreferredVectorWidthChar, + DPCTLDevice_GetPreferredVectorWidthDouble, + DPCTLDevice_GetPreferredVectorWidthFloat, + DPCTLDevice_GetPreferredVectorWidthHalf, + DPCTLDevice_GetPreferredVectorWidthInt, + DPCTLDevice_GetPreferredVectorWidthLong, + DPCTLDevice_GetPreferredVectorWidthShort, + DPCTLDevice_GetSubGroupIndependentForwardProgress, + DPCTLDevice_GetVendor, + DPCTLDevice_HasAspect, DPCTLDevice_IsAccelerator, DPCTLDevice_IsCPU, DPCTLDevice_IsGPU, DPCTLDevice_IsHost, - DPCTLDeviceMgr_PrintDeviceInfo, DPCTLDeviceMgr_GetRelativeId, - DPCTLFilterSelector_Create, + DPCTLDeviceMgr_PrintDeviceInfo, DPCTLDeviceSelector_Delete, DPCTLDeviceSelector_Score, + DPCTLDeviceVector_Delete, + DPCTLDeviceVector_GetAt, + DPCTLDeviceVector_Size, + DPCTLDeviceVectorRef, + DPCTLFilterSelector_Create, DPCTLSize_t_Array_Delete, DPCTLSyclBackendType, DPCTLSyclDeviceRef, DPCTLSyclDeviceSelectorRef, - DPCTLDevice_HasAspect, DPCTLSyclDeviceType, - DPCTLDevice_GetMaxReadImageArgs, - DPCTLDevice_GetMaxWriteImageArgs, - DPCTLDevice_GetImage2dMaxWidth, - DPCTLDevice_GetImage2dMaxHeight, - DPCTLDevice_GetImage3dMaxWidth, - DPCTLDevice_GetImage3dMaxHeight, - DPCTLDevice_GetImage3dMaxDepth, - DPCTLDevice_GetSubGroupIndependentForwardProgress, - DPCTLDevice_GetPreferredVectorWidthChar, - DPCTLDevice_GetPreferredVectorWidthShort, - DPCTLDevice_GetPreferredVectorWidthInt, - DPCTLDevice_GetPreferredVectorWidthLong, - DPCTLDevice_GetPreferredVectorWidthFloat, - DPCTLDevice_GetPreferredVectorWidthDouble, - DPCTLDevice_GetPreferredVectorWidthHalf, - DPCTLDevice_CreateSubDevicesEqually, - DPCTLDevice_CreateSubDevicesByCounts, - DPCTLDevice_CreateSubDevicesByAffinity, - DPCTLDevice_GetParentDevice, + _aspect_type, + _backend_type, + _device_type, + _partition_affinity_domain_type, ) -from . import backend_type, device_type -from libc.stdint cimport uint32_t, int64_t -from libc.stdlib cimport malloc, free -import warnings + +from .enum_types import backend_type, device_type + +from libc.stdint cimport int64_t, uint32_t +from libc.stdlib cimport free, malloc + import collections +import warnings __all__ = [ "SyclDevice", @@ -101,7 +104,8 @@ cdef class SubDeviceCreationError(Exception): cdef class _SyclDevice: - """ A helper data-owner class to abstract a cl::sycl::device instance. + """ + A helper data-owner class to abstract a cl::sycl::device instance. """ def __dealloc__(self): @@ -130,31 +134,31 @@ cdef list _get_devices(DPCTLDeviceVectorRef DVRef): cdef str _backend_type_to_filter_string_part(DPCTLSyclBackendType BTy): - if BTy == _backend_type._CUDA: - return "cuda" - elif BTy == _backend_type._HOST: - return "host" - elif BTy == _backend_type._LEVEL_ZERO: - return "level_zero" - elif BTy == _backend_type._OPENCL: - return "opencl" - else: - return "unknown" + if BTy == _backend_type._CUDA: + return "cuda" + elif BTy == _backend_type._HOST: + return "host" + elif BTy == _backend_type._LEVEL_ZERO: + return "level_zero" + elif BTy == _backend_type._OPENCL: + return "opencl" + else: + return "unknown" cdef str _device_type_to_filter_string_part(DPCTLSyclDeviceType DTy): - if DTy == _device_type._ACCELERATOR: - return "accelerator" - elif DTy == _device_type._AUTOMATIC: - return "automatic" - elif DTy == _device_type._CPU: - return "cpu" - elif DTy == _device_type._GPU: - return "gpu" - elif DTy == _device_type._HOST_DEVICE: - return "host" - else: - return "unknown" + if DTy == _device_type._ACCELERATOR: + return "accelerator" + elif DTy == _device_type._AUTOMATIC: + return "automatic" + elif DTy == _device_type._CPU: + return "cpu" + elif DTy == _device_type._GPU: + return "gpu" + elif DTy == _device_type._HOST_DEVICE: + return "host" + else: + return "unknown" cdef class SyclDevice(_SyclDevice): @@ -162,17 +166,17 @@ cdef class SyclDevice(_SyclDevice): There are two ways of creating a SyclDevice instance: - - by directly passing in a filter string to the class constructor. The - filter string needs to conform to the the `DPC++ filter selector SYCL - extension `_. + - by directly passing in a filter string to the class + constructor. The filter string needs to conform to the + `DPC++ filter selector SYCL extension `_. :Example: .. code-block:: python import dpctl - # Create a SyclDevice with an explicit filter string, in - # this case the first level_zero gpu device. + # Create a SyclDevice with an explicit filter string, + # in this case the first level_zero gpu device. level_zero_gpu = dpctl.SyclDevice("level_zero:gpu:0"): level_zero_gpu.print_device_info() @@ -224,7 +228,7 @@ cdef class SyclDevice(_SyclDevice): return -1 self._name = DPCTLDevice_GetName(self._device_ref) self._driver_version = DPCTLDevice_GetDriverVersion(self._device_ref) - self._max_work_item_sizes = ( + self._max_work_item_sizes = ( DPCTLDevice_GetMaxWorkItemSizes(self._device_ref) ) self._vendor = DPCTLDevice_GetVendor(self._device_ref) @@ -288,7 +292,7 @@ cdef class SyclDevice(_SyclDevice): """ DPCTLDeviceMgr_PrintDeviceInfo(self._device_ref) - cdef DPCTLSyclDeviceRef get_device_ref (self): + cdef DPCTLSyclDeviceRef get_device_ref(self): """ Returns the DPCTLSyclDeviceRef pointer for this class. """ return self._device_ref @@ -539,10 +543,10 @@ cdef class SyclDevice(_SyclDevice): @property def max_work_item_dims(self): - """ Returns the maximum dimensions that specify - the global and local work-item IDs used by the - data parallel execution model. The cb - value is 3 if this SYCL device is not of device + """ Returns the maximum dimensions that specify the global and local + work-item IDs used by the data parallel execution model. + + The cb value is 3 if this SYCL device is not of device type ``info::device_type::custom``. """ cdef uint32_t max_work_item_dims = 0 @@ -551,11 +555,10 @@ cdef class SyclDevice(_SyclDevice): @property def max_work_item_sizes(self): - """ Returns the maximum number of work-items - that are permitted in each dimension of the - work-group of the nd_range. The minimum - value is (1; 1; 1) for devices that are not of - device type ``info::device_type::custom``. + """ Returns the maximum number of work-items that are permitted in each + dimension of the work-group of the nd_range. The minimum value is + `(1; 1; 1)` for devices that are not of device type + ``info::device_type::custom``. """ return ( self._max_work_item_sizes[0], @@ -565,8 +568,8 @@ cdef class SyclDevice(_SyclDevice): @property def max_compute_units(self): - """ Returns the number of parallel compute units - available to the device. The minimum value is 1. + """ Returns the number of parallel compute units available to the + device. The minimum value is 1. """ cdef uint32_t max_compute_units = 0 max_compute_units = DPCTLDevice_GetMaxComputeUnits(self._device_ref) @@ -601,7 +604,9 @@ cdef class SyclDevice(_SyclDevice): """ Returns true if the device supports independent forward progress of sub-groups with respect to other sub-groups in the same work-group. """ - return DPCTLDevice_GetSubGroupIndependentForwardProgress(self._device_ref) + return DPCTLDevice_GetSubGroupIndependentForwardProgress( + self._device_ref + ) @property def preferred_vector_width_char(self): @@ -675,17 +680,28 @@ cdef class SyclDevice(_SyclDevice): return "SyclDevice" def __repr__(self): - return ("".format(hex(id(self))) ) + return ( + "".format(hex(id(self))) + ) cdef list create_sub_devices_equally(self, size_t count): - """ Returns a vector of sub devices partitioned from this SYCL device - based on the count parameter. The returned - vector contains as many sub devices as can be created such that each sub - device contains count compute units. If the device’s total number of compute - units is not evenly divided by count, then the remaining compute units are - not included in any of the sub devices. + """ Returns a list of sub-devices partitioned from this SYCL device + based on the ``count`` parameter. + + The returned list contains as many sub-devices as can be created + such that each sub-device contains count compute units. If the + device’s total number of compute units is not evenly divided by + count, then the remaining compute units are not included in any of + the sub-devices. """ cdef DPCTLDeviceVectorRef DVRef = NULL DVRef = DPCTLDevice_CreateSubDevicesEqually(self._device_ref, count) @@ -694,33 +710,41 @@ cdef class SyclDevice(_SyclDevice): return _get_devices(DVRef) cdef list create_sub_devices_by_counts(self, object counts): - """ Returns a vector of sub devices - partitioned from this SYCL device based on the counts parameter. For each - non-zero value M in the counts vector, a sub device with M compute units - is created. + """ Returns a list of sub-devices partitioned from this SYCL device + based on the ``counts`` parameter. + + For each non-zero value ``M`` in the counts vector, a sub-device + with ``M`` compute units is created. """ cdef int ncounts = len(counts) cdef size_t *counts_buff = NULL cdef DPCTLDeviceVectorRef DVRef = NULL cdef int i - if (ncounts == 0): - raise TypeError("Non-empty object representing list of counts is expected.") + if ncounts == 0: + raise TypeError( + "Non-empty object representing list of counts is expected." + ) counts_buff = malloc(( ncounts) * sizeof(size_t)) - if (counts_buff is NULL): - raise MemoryError("Allocation of counts array of size {} failed.".format(ncounts)) + if counts_buff is NULL: + raise MemoryError( + "Allocation of counts array of size {} failed.".format(ncounts) + ) for i in range(ncounts): counts_buff[i] = counts[i] - DVRef = DPCTLDevice_CreateSubDevicesByCounts(self._device_ref, counts_buff, ncounts) + DVRef = DPCTLDevice_CreateSubDevicesByCounts( + self._device_ref, counts_buff, ncounts + ) free(counts_buff) if DVRef is NULL: raise SubDeviceCreationError("Sub-devices were not created.") return _get_devices(DVRef) - cdef list create_sub_devices_by_affinity(self, _partition_affinity_domain_type domain): - """ Returns a vector of sub devices - partitioned from this SYCL device by affinity domain based on the domain - parameter. + cdef list create_sub_devices_by_affinity( + self, _partition_affinity_domain_type domain + ): + """ Returns a list of sub-devices partitioned from this SYCL device by + affinity domain based on the ``domain`` parameter. """ cdef DPCTLDeviceVectorRef DVRef = NULL DVRef = DPCTLDevice_CreateSubDevicesByAffinity(self._device_ref, domain) @@ -729,11 +753,15 @@ cdef class SyclDevice(_SyclDevice): return _get_devices(DVRef) def create_sub_devices(self, **kwargs): - if not kwargs.has_key('partition'): - raise TypeError("create_sub_devices(partition=parition_spec) is expected.") - partition = kwargs.pop('partition') - if (kwargs): - raise TypeError("create_sub_devices(partition=parition_spec) is expected.") + if "partition" not in kwargs: + raise TypeError( + "create_sub_devices(partition=parition_spec) is expected." + ) + partition = kwargs.pop("partition") + if kwargs: + raise TypeError( + "create_sub_devices(partition=parition_spec) is expected." + ) if isinstance(partition, int) and partition > 0: return self.create_sub_devices_equally(partition) elif isinstance(partition, str): @@ -750,19 +778,28 @@ cdef class SyclDevice(_SyclDevice): elif partition == "L1_cache": domain_type = _partition_affinity_domain_type._L1_cache elif partition == "next_partitionable": - domain_type = _partition_affinity_domain_type._next_partitionable + domain_type = ( + _partition_affinity_domain_type._next_partitionable + ) else: - raise TypeError("Partition affinity domain {} is not understood.".format(partition)) + raise TypeError( + "Partition affinity domain {} is not understood.".format( + partition + ) + ) return self.create_sub_devices_by_affinity(domain_type) - elif (isinstance(partition, collections.abc.Sized) and - isinstance(partition, collections.abc.Iterable)): + elif isinstance(partition, collections.abc.Sized) and isinstance( + partition, collections.abc.Iterable + ): return self.create_sub_devices_by_counts(partition) else: try: partition = int(partition) return self.create_sub_devices_equally(partition) except Exception as e: - raise TypeError("Unsupported type of sub-device argument") from e + raise TypeError( + "Unsupported type of sub-device argument" + ) from e @property def parent_device(self): @@ -788,8 +825,8 @@ cdef class SyclDevice(_SyclDevice): @property def filter_string(self): - """ For a parent device returns a tuple (backend, device_kind, relative_id). - Raises an exception for sub-devices. + """ For a parent device, returns a ``tuple (backend, device_kind, + relative_id)``. Raises an exception for sub-devices. """ cdef DPCTLSyclDeviceRef pDRef = NULL cdef DPCTLSyclBackendType BTy diff --git a/dpctl/_sycl_device_factory.pxd b/dpctl/_sycl_device_factory.pxd index 147e944832..d267f4a55f 100644 --- a/dpctl/_sycl_device_factory.pxd +++ b/dpctl/_sycl_device_factory.pxd @@ -23,8 +23,10 @@ specific backend or device_type. """ from libcpp cimport bool as cpp_bool + from ._sycl_device cimport SyclDevice + cpdef SyclDevice select_accelerator_device() cpdef SyclDevice select_cpu_device() cpdef SyclDevice select_default_device() diff --git a/dpctl/_sycl_device_factory.pyx b/dpctl/_sycl_device_factory.pyx index 8cb157836f..8c7fde2495 100644 --- a/dpctl/_sycl_device_factory.pyx +++ b/dpctl/_sycl_device_factory.pyx @@ -25,9 +25,7 @@ backend_type combination. """ -from ._backend cimport ( - _backend_type, - _device_type, +from ._backend cimport ( # noqa: E211 DPCTLAcceleratorSelector_Create, DPCTLCPUSelector_Create, DPCTLDefaultSelector_Create, @@ -35,18 +33,22 @@ from ._backend cimport ( DPCTLDeviceMgr_GetDevices, DPCTLDeviceMgr_GetNumDevices, DPCTLDeviceSelector_Delete, - DPCTLDeviceVectorRef, DPCTLDeviceVector_Delete, DPCTLDeviceVector_GetAt, DPCTLDeviceVector_Size, + DPCTLDeviceVectorRef, DPCTLGPUSelector_Create, DPCTLHostSelector_Create, DPCTLSyclBackendType, DPCTLSyclDeviceRef, DPCTLSyclDeviceSelectorRef, DPCTLSyclDeviceType, + _backend_type, + _device_type, ) -from . import backend_type, device_type as device_type_t + +from .enum_types import backend_type +from .enum_types import device_type as device_type_t __all__ = [ "get_devices", @@ -159,7 +161,7 @@ cpdef list get_devices(backend=backend_type.all, device_type=device_type_t.all): else: raise TypeError( "backend should be specified as a str or an " - "enum_types.backend_type" + "``enum_types.backend_type``." ) if isinstance(device_type, str): @@ -169,7 +171,7 @@ cpdef list get_devices(backend=backend_type.all, device_type=device_type_t.all): else: raise TypeError( "device type should be specified as a str or an " - "enum_types.device_type" + "``enum_types.device_type``." ) DVRef = DPCTLDeviceMgr_GetDevices(BTy | DTy) @@ -192,8 +194,8 @@ cpdef int get_num_devices( BTy = _enum_to_dpctl_sycl_backend_ty(backend) else: raise TypeError( - "backend should be specified as a str or an " - "enum_types.backend_type" + "backend should be specified as a ``str`` or an " + "``enum_types.backend_type``" ) if isinstance(device_type, str): @@ -202,8 +204,8 @@ cpdef int get_num_devices( DTy = _enum_to_dpctl_sycl_device_ty(device_type) else: raise TypeError( - "device type should be specified as a str or an " - "enum_types.device_type" + "device type should be specified as a ``str`` or an " + "``enum_types.device_type``" ) num_devices = DPCTLDeviceMgr_GetNumDevices(BTy | DTy) @@ -212,42 +214,63 @@ cpdef int get_num_devices( cpdef cpp_bool has_cpu_devices(): - """ Returns: True if `sycl::device_type::cpu` devices are present, False otherwise + """ A helper function to check if there are any SYCL CPU devices available. + + Returns: + bool: ``True`` if ``sycl::device_type::cpu`` devices are present, + ``False`` otherwise. """ cdef int num_cpu_dev = DPCTLDeviceMgr_GetNumDevices(_device_type._CPU) return num_cpu_dev cpdef cpp_bool has_gpu_devices(): - """ Returns: True if `sycl::device_type::gpu` devices are present, False otherwise + """ A helper function to check if there are any SYCL GPU devices available. + + Returns: + bool: ``True`` if ``sycl::device_type::gpu`` devices are present, + ``False`` otherwise. """ cdef int num_gpu_dev = DPCTLDeviceMgr_GetNumDevices(_device_type._GPU) return num_gpu_dev cpdef cpp_bool has_accelerator_devices(): - """ Returns: True if `sycl::device_type::accelerator` devices are present, False otherwise + """ A helper function to check if there are any SYCL Accelerator devices + available. + + Returns: + bool: ``True`` if ``sycl::device_type::accelerator`` devices are + present, ``False`` otherwise. """ - cdef int num_accelerator_dev = DPCTLDeviceMgr_GetNumDevices(_device_type._ACCELERATOR) + cdef int num_accelerator_dev = DPCTLDeviceMgr_GetNumDevices( + _device_type._ACCELERATOR + ) return num_accelerator_dev cpdef cpp_bool has_host_device(): - """ Returns: True if `sycl::device_type::host` devices are present, False otherwise + """ A helper function to check if there are any SYCL Host devices available. + + Returns: + bool: ``True`` if ``sycl::device_type::host`` devices are present, + ``False`` otherwise. """ - cdef int num_host_dev = DPCTLDeviceMgr_GetNumDevices(_device_type._HOST_DEVICE) + cdef int num_host_dev = DPCTLDeviceMgr_GetNumDevices( + _device_type._HOST_DEVICE + ) return num_host_dev cpdef SyclDevice select_accelerator_device(): - """ A wrapper for SYCL's `accelerator_selector` device_selector class. + """ A wrapper for SYCL's ``accelerator_selector`` class. Returns: - A new SyclDevice object containing the SYCL device returned by the - `accelerator_selector`. + A new class:`dpctl.SyclDevice` object containing the SYCL device + returned by the ``accelerator_selector``. Raises: - A ValueError is raised if the SYCL `accelerator_selector` is unable to - select a device. + A ``ValueError`` is raised if the SYCL ``accelerator_selector`` is + unable to select a device. """ cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLAcceleratorSelector_Create() cdef DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef) @@ -260,14 +283,14 @@ cpdef SyclDevice select_accelerator_device(): cpdef SyclDevice select_cpu_device(): - """ A wrapper for SYCL's `cpu_selector` device_selector class. + """ A wrapper for SYCL's ``cpu_selector`` class. Returns: - A new SyclDevice object containing the SYCL device returned by the - `cpu_selector`. + A new class:`dpctl.SyclDevice` object containing the SYCL device + returned by the ``cpu_selector``. Raises: - A ValueError is raised if the SYCL `cpu_seector` is unable to select a - device. + A ``ValueError`` is raised if the SYCL ``cpu_selector`` is unable to + select a device. """ cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLCPUSelector_Create() cdef DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef) @@ -280,13 +303,13 @@ cpdef SyclDevice select_cpu_device(): cpdef SyclDevice select_default_device(): - """ A wrapper for SYCL's `default_selector` device_selector class. + """ A wrapper for SYCL's ``default_selector`` class. Returns: - A new SyclDevice object containing the SYCL device returned by the - `default_selector`. + A new class:`dpctl.SyclDevice` object containing the SYCL device + returned by the ``default_selector``. Raises: - A ValueError is raised if the SYCL `default_seector` is unable to + A ``ValueError`` is raised if the SYCL ``default_selector`` is unable to select a device. """ cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLDefaultSelector_Create() @@ -300,14 +323,14 @@ cpdef SyclDevice select_default_device(): cpdef SyclDevice select_gpu_device(): - """ A wrapper for SYCL's `gpu_selector` device_selector class. + """ A wrapper for SYCL's ``gpu_selector`` class. Returns: - A new SyclDevice object containing the SYCL device returned by the - `gpu_selector`. + A new class:`dpctl.SyclDevice` object containing the SYCL device + returned by the ``gpu_selector``. Raises: - A ValueError is raised if the SYCL `gpu_seector` is unable to select a - device. + A ``ValueError`` is raised if the SYCL `gpu_selector` is unable to + select a device. """ cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLGPUSelector_Create() cdef DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef) @@ -320,14 +343,14 @@ cpdef SyclDevice select_gpu_device(): cpdef SyclDevice select_host_device(): - """ A wrapper for SYCL's `host_selector` device_selector class. + """ A wrapper for SYCL's ``host_selector`` class. Returns: - A new SyclDevice object containing the SYCL device returned by the - `host_selector`. + A new class:`dpctl.SyclDevice` object containing the SYCL device + returned by the ``host_selector``. Raises: - A ValueError is raised if the SYCL `host_seector` is unable to select a - device. + A ``ValueError`` is raised if the SYCL ``host_selector`` is unable to + select a device. """ cdef DPCTLSyclDeviceSelectorRef DSRef = DPCTLHostSelector_Create() cdef DPCTLSyclDeviceRef DRef = DPCTLDevice_CreateFromSelector(DSRef) diff --git a/dpctl/_sycl_event.pyx b/dpctl/_sycl_event.pyx index 79181749d3..d0a062fbc3 100644 --- a/dpctl/_sycl_event.pyx +++ b/dpctl/_sycl_event.pyx @@ -21,8 +21,10 @@ """ from __future__ import print_function + import logging -from ._backend cimport DPCTLSyclEventRef, DPCTLEvent_Delete, DPCTLEvent_Wait + +from ._backend cimport DPCTLEvent_Delete, DPCTLEvent_Wait, DPCTLSyclEventRef __all__ = [ "SyclEvent", @@ -36,25 +38,25 @@ cdef class SyclEvent: """ @staticmethod - cdef SyclEvent _create (DPCTLSyclEventRef eref, list args): + cdef SyclEvent _create(DPCTLSyclEventRef eref, list args): cdef SyclEvent ret = SyclEvent.__new__(SyclEvent) ret._event_ref = eref ret._args = args return ret - def __dealloc__ (self): + def __dealloc__(self): self.wait() DPCTLEvent_Delete(self._event_ref) - cdef DPCTLSyclEventRef get_event_ref (self): + cdef DPCTLSyclEventRef get_event_ref(self): """ Returns the DPCTLSyclEventRef pointer for this class. """ return self._event_ref - cpdef void wait (self): + cpdef void wait(self): DPCTLEvent_Wait(self._event_ref) - def addressof_ref (self): + def addressof_ref(self): """ Returns the address of the C API DPCTLSyclEventRef pointer as a size_t. diff --git a/dpctl/_sycl_platform.pxd b/dpctl/_sycl_platform.pxd index 04b9d56c13..07b0cc2216 100644 --- a/dpctl/_sycl_platform.pxd +++ b/dpctl/_sycl_platform.pxd @@ -21,10 +21,7 @@ SYCL platform-related helper functions. """ -from ._backend cimport ( - DPCTLSyclPlatformRef, - DPCTLSyclDeviceSelectorRef, -) +from ._backend cimport DPCTLSyclDeviceSelectorRef, DPCTLSyclPlatformRef cdef class _SyclPlatform: diff --git a/dpctl/_sycl_platform.pyx b/dpctl/_sycl_platform.pyx index aca3faacef..0a8d627841 100644 --- a/dpctl/_sycl_platform.pyx +++ b/dpctl/_sycl_platform.pyx @@ -21,11 +21,11 @@ """ from __future__ import print_function -from ._backend cimport( - _backend_type, + +from ._backend cimport ( # noqa: E211 DPCTLCString_Delete, - DPCTLFilterSelector_Create, DPCTLDeviceSelector_Delete, + DPCTLFilterSelector_Create, DPCTLPlatform_Copy, DPCTLPlatform_Create, DPCTLPlatform_CreateFromSelector, @@ -43,8 +43,10 @@ from ._backend cimport( DPCTLSyclBackendType, DPCTLSyclDeviceSelectorRef, DPCTLSyclPlatformRef, + _backend_type, ) -from . import backend_type + +from .enum_types import backend_type __all__ = [ "get_platforms", @@ -64,10 +66,11 @@ cdef class _SyclPlatform: cdef class SyclPlatform(_SyclPlatform): - """ Python class representing cl::sycl::platform class. + """ Python class representing ``cl::sycl::platform`` class. SyclPlatform() - create platform selected by sycl::default_selector - SyclPlatform(filter_selector) - create platform selected by filter selector + SyclPlatform(filter_selector) - create platform selected by filter + selector """ @staticmethod cdef void _init_helper(_SyclPlatform platform, DPCTLSyclPlatformRef PRef): @@ -79,7 +82,7 @@ cdef class SyclPlatform(_SyclPlatform): @staticmethod cdef SyclPlatform _create(DPCTLSyclPlatformRef pref): """ - This function calls DPCTLPlatform_Delete(pref). + This function calls ``DPCTLPlatform_Delete(pref)``. The user of this function must pass a copy to keep the pref argument alive. @@ -113,8 +116,8 @@ cdef class SyclPlatform(_SyclPlatform): SyclPlatform._init_helper(self, PRef) return 0 - cdef DPCTLSyclPlatformRef get_platform_ref (self): - """ Returns the DPCTLSyclPlatformRef pointer for this class. + cdef DPCTLSyclPlatformRef get_platform_ref(self): + """ Returns the ``DPCTLSyclPlatformRef`` pointer for this class. """ return self._platform_ref @@ -123,9 +126,17 @@ cdef class SyclPlatform(_SyclPlatform): return "SyclPlatform" def __repr__(self): - return ("".format(hex(id(self))) ) + return ( + "".format(hex(id(self))) + ) def __cinit__(self, arg=None): if type(arg) is unicode: diff --git a/dpctl/_sycl_queue.pxd b/dpctl/_sycl_queue.pxd index 2b078ad4a1..0798777f78 100644 --- a/dpctl/_sycl_queue.pxd +++ b/dpctl/_sycl_queue.pxd @@ -20,16 +20,14 @@ """ This file declares the SyclQueue extension type. """ -from ._backend cimport ( - DPCTLSyclDeviceRef, - DPCTLKernelArgType, - DPCTLSyclQueueRef, -) +from libcpp cimport bool as cpp_bool + +from ._backend cimport DPCTLKernelArgType, DPCTLSyclDeviceRef, DPCTLSyclQueueRef from ._sycl_context cimport SyclContext -from ._sycl_event cimport SyclEvent from ._sycl_device cimport SyclDevice +from ._sycl_event cimport SyclEvent from .program._program cimport SyclKernel -from libcpp cimport bool as cpp_bool + cdef void default_async_error_handler(int) nogil except * diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index 63358efa48..a0b3f5c347 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -21,18 +21,16 @@ """ from __future__ import print_function -from ._backend cimport ( - _arg_data_type, - _backend_type, - _queue_property_type, + +from ._backend cimport ( # noqa: E211 DPCTLContext_Create, DPCTLContext_Delete, DPCTLDefaultSelector_Create, + DPCTLDevice_Copy, DPCTLDevice_CreateFromSelector, + DPCTLDevice_Delete, DPCTLDeviceMgr_GetCachedContext, DPCTLDeviceSelector_Delete, - DPCTLDevice_Copy, - DPCTLDevice_Delete, DPCTLFilterSelector_Create, DPCTLQueue_AreEq, DPCTLQueue_Copy, @@ -41,26 +39,32 @@ from ._backend cimport ( DPCTLQueue_GetBackend, DPCTLQueue_GetContext, DPCTLQueue_GetDevice, + DPCTLQueue_IsInOrder, DPCTLQueue_MemAdvise, DPCTLQueue_Memcpy, DPCTLQueue_Prefetch, DPCTLQueue_SubmitNDRange, DPCTLQueue_SubmitRange, DPCTLQueue_Wait, - DPCTLQueue_IsInOrder, DPCTLSyclBackendType, DPCTLSyclContextRef, DPCTLSyclDeviceSelectorRef, DPCTLSyclEventRef, + _arg_data_type, + _backend_type, + _queue_property_type, error_handler_callback, ) from .memory._memory cimport _Memory -from . import backend_type + import ctypes -from libc.stdlib cimport malloc, free + +from .enum_types import backend_type + from cpython cimport pycapsule -import logging +from libc.stdlib cimport free, malloc +import logging __all__ = [ "SyclQueue", @@ -132,18 +136,25 @@ cdef int _parse_queue_properties(object prop) except *: elif (p == "default"): res = res | _queue_property_type._DEFAULT_PROPERTY else: - raise ValueError(("queue property '{}' is not understood, " - "expecting 'in_order', 'enable_profiling', or 'default'" - ).format(prop)) + raise ValueError( + ( + "queue property '{}' is not understood, " + "expecting 'in_order', 'enable_profiling', or 'default'" + ).format(prop) + ) else: - raise ValueError("queue property '{}' is not understood.".format(prop)) + raise ValueError( + "queue property '{}' is not understood.".format(prop) + ) return res cdef void _queue_capsule_deleter(object o): cdef DPCTLSyclQueueRef QRef = NULL if pycapsule.PyCapsule_IsValid(o, "SyclQueueRef"): - QRef = pycapsule.PyCapsule_GetPointer(o, "SyclQueueRef") + QRef = pycapsule.PyCapsule_GetPointer( + o, "SyclQueueRef" + ) DPCTLQueue_Delete(QRef) @@ -212,7 +223,7 @@ cdef class SyclQueue(_SyclQueue): # Create a CPU device using the opencl driver cpu_d = dpctl.SyclDevice("opencl:cpu") - # Partition the CPU device into sub-devices, each with two cores. + # Partition the CPU device into sub-devices with two cores each. sub_devices = cpu_d.create_sub_devices(partition=2) # Create a context common to all the sub-devices. ctx = dpctl.SyclContext(sub_devices) @@ -244,10 +255,11 @@ cdef class SyclQueue(_SyclQueue): "enable_profiling", or a tuple containing these. Raises: - SyclQueueCreationError: If the :class:`dpctl.SyclQueue` object creation failed. - TypeError: In case of incorrect arguments given to constructors, unexpected types - of input arguments, or in the case the input capsule contained a null - pointer or could not be renamed. + SyclQueueCreationError: If the :class:`dpctl.SyclQueue` object + creation failed. + TypeError: In case of incorrect arguments given to constructors, + unexpected types of input arguments, or in the case the input + capsule contained a null pointer or could not be renamed. """ def __cinit__(self, *args, **kwargs): @@ -326,7 +338,10 @@ cdef class SyclQueue(_SyclQueue): "SYCL Queue failed to be created from '{}'.".format(arg) ) elif status == -5: - raise TypeError("Input capsule {} contains a null pointer or could not be renamed".format(arg)) + raise TypeError( + "Input capsule {} contains a null pointer or could not " + "be renamed".format(arg) + ) cdef int _init_queue_from__SyclQueue(self, _SyclQueue other): """ Copy data container _SyclQueue fields over. @@ -378,7 +393,7 @@ cdef class SyclQueue(_SyclQueue): self._device = _dev self._context = _ctxt self._queue_ref = QRef - return 0 # normal return + return 0 # normal return cdef int _init_queue_from_filter_string(self, const char *c_str, int props): """ @@ -399,12 +414,12 @@ cdef class SyclQueue(_SyclQueue): DSRef = DPCTLFilterSelector_Create(c_str) if DSRef is NULL: - ret = -1 # Filter selector failed to be created + ret = -1 # Filter selector failed to be created else: DRef = DPCTLDevice_CreateFromSelector(DSRef) DPCTLDeviceSelector_Delete(DSRef) if (DRef is NULL): - ret = -2 # Device could not be created + ret = -2 # Device could not be created else: ret = self._init_queue_from_DPCTLSyclDeviceRef(DRef, props) return ret @@ -415,7 +430,7 @@ cdef class SyclQueue(_SyclQueue): # is garbage collected. DRef = DPCTLDevice_Copy(dev.get_device_ref()) if (DRef is NULL): - return -2 # Device could not be created + return -2 # Device could not be created else: return self._init_queue_from_DPCTLSyclDeviceRef(DRef, props) @@ -427,7 +442,7 @@ cdef class SyclQueue(_SyclQueue): DRef = DPCTLDevice_CreateFromSelector(DSRef) DPCTLDeviceSelector_Delete(DSRef) if (DRef is NULL): - ret = -2 # Device could not be created + ret = -2 # Device could not be created else: ret = self._init_queue_from_DPCTLSyclDeviceRef(DRef, props) return ret @@ -435,8 +450,6 @@ cdef class SyclQueue(_SyclQueue): cdef int _init_queue_from_context_and_device( self, SyclContext ctxt, SyclDevice dev, int props ): - """ - """ cdef DPCTLSyclContextRef CRef = NULL cdef DPCTLSyclDeviceRef DRef = NULL cdef DPCTLSyclQueueRef QRef = NULL @@ -453,13 +466,13 @@ cdef class SyclQueue(_SyclQueue): self._device = dev self._context = ctxt self._queue_ref = QRef - return 0 # normal return + return 0 # normal return cdef int _init_queue_from_capsule(self, object cap): """ For named PyCapsule with name SyclQueueRef, which carries pointer to - sycl::queue object, interpreted as DPCTLSyclQueueRef, creates corresponding - SyclQueue. + sycl::queue object, interpreted as DPCTLSyclQueueRef, creates + corresponding SyclQueue. """ cdef DPCTLSyclContextRef CRef = NULL cdef DPCTLSyclDeviceRef DRef = NULL @@ -467,7 +480,9 @@ cdef class SyclQueue(_SyclQueue): cdef DPCTLSyclQueueRef QRef_copy = NULL cdef int ret = 0 if pycapsule.PyCapsule_IsValid(cap, "SyclQueueRef"): - QRef = pycapsule.PyCapsule_GetPointer(cap, "SyclQueueRef") + QRef = pycapsule.PyCapsule_GetPointer( + cap, "SyclQueueRef" + ) if (QRef is NULL): return -5 ret = pycapsule.PyCapsule_SetName(cap, "used_SyclQueueRef") @@ -475,7 +490,7 @@ cdef class SyclQueue(_SyclQueue): return -5 QRef_copy = DPCTLQueue_Copy(QRef) if (QRef_copy is NULL): - return -6 + return -6 CRef = DPCTLQueue_GetContext(QRef_copy) if (CRef is NULL): DPCTLQueue_Delete(QRef_copy) @@ -491,8 +506,8 @@ cdef class SyclQueue(_SyclQueue): return 0 else: # __cinit__ checks that capsule is valid, so one can be here only - # if call to `_init_queue_from_capsule` was made outside of __cinit__ - # and the capsule was not checked to be valid + # if call to `_init_queue_from_capsule` was made outside of + # __cinit__ and the capsule was not checked to be valid. return -128 @staticmethod @@ -620,7 +635,7 @@ cdef class SyclQueue(_SyclQueue): else: return False - def get_sycl_backend (self): + def get_sycl_backend(self): """ Returns the Sycl backend associated with the queue. """ cdef DPCTLSyclBackendType BE = DPCTLQueue_GetBackend(self._queue_ref) @@ -654,11 +669,12 @@ cdef class SyclQueue(_SyclQueue): def addressof_ref(self): """ - Returns the address of the C API DPCTLSyclQueueRef pointer as a size_t. + Returns the address of the C API DPCTLSyclQueueRef pointer as a + ``size_t``. Returns: - The address of the DPCTLSyclQueueRef object used to create this - SyclQueue cast to a size_t. + The address of the ``DPCTLSyclQueueRef`` object used to create this + :class:`dpctl.SyclQueue` cast to a ``size_t``. """ return int(self._queue_ref) @@ -667,8 +683,8 @@ cdef class SyclQueue(_SyclQueue): SyclKernel kernel, list args, list gS, - list lS = None, - list dEvents = None + list lS=None, + list dEvents=None ): cdef void **kargs = NULL cdef DPCTLKernelArgType *kargty = NULL @@ -685,13 +701,17 @@ cdef class SyclQueue(_SyclQueue): kargs = malloc(len(args) * sizeof(void*)) if not kargs: raise MemoryError() - kargty = malloc(len(args)*sizeof(DPCTLKernelArgType)) + kargty = ( + malloc(len(args)*sizeof(DPCTLKernelArgType)) + ) if not kargty: free(kargs) raise MemoryError() # Create the array of dependent events if any if dEvents is not None and nDE > 0: - depEvents = malloc(nDE*sizeof(DPCTLSyclEventRef)) + depEvents = ( + malloc(nDE*sizeof(DPCTLSyclEventRef)) + ) if not depEvents: free(kargs) free(kargty) @@ -730,7 +750,7 @@ cdef class SyclQueue(_SyclQueue): nDE ) else: - ret = self._populate_range (gRange, gS, nGS) + ret = self._populate_range(gRange, gS, nGS) if ret == -1: free(kargs) free(kargty) @@ -739,7 +759,7 @@ cdef class SyclQueue(_SyclQueue): "Range with ", nGS, " not allowed. Range can only have " "between one and three dimensions." ) - ret = self._populate_range (lRange, lS, nLS) + ret = self._populate_range(lRange, lS, nLS) if ret == -1: free(kargs) free(kargty) @@ -799,30 +819,30 @@ cdef class SyclQueue(_SyclQueue): DPCTLQueue_Memcpy(self._queue_ref, c_dest, c_src, count) cpdef prefetch(self, mem, size_t count=0): - cdef void *ptr + cdef void *ptr - if isinstance(mem, _Memory): - ptr = (<_Memory>mem).memory_ptr - else: - raise TypeError("Parameter `mem` should have type _Memory") + if isinstance(mem, _Memory): + ptr = (<_Memory>mem).memory_ptr + else: + raise TypeError("Parameter `mem` should have type _Memory") - if (count <=0 or count > self.nbytes): - count = self.nbytes + if (count <=0 or count > self.nbytes): + count = self.nbytes - DPCTLQueue_Prefetch(self._queue_ref, ptr, count) + DPCTLQueue_Prefetch(self._queue_ref, ptr, count) cpdef mem_advise(self, mem, size_t count, int advice): - cdef void *ptr + cdef void *ptr - if isinstance(mem, _Memory): - ptr = (<_Memory>mem).memory_ptr - else: - raise TypeError("Parameter `mem` should have type _Memory") + if isinstance(mem, _Memory): + ptr = (<_Memory>mem).memory_ptr + else: + raise TypeError("Parameter `mem` should have type _Memory") - if (count <=0 or count > self.nbytes): - count = self.nbytes + if (count <=0 or count > self.nbytes): + count = self.nbytes - DPCTLQueue_MemAdvise(self._queue_ref, ptr, count, advice) + DPCTLQueue_MemAdvise(self._queue_ref, ptr, count, advice) @property def is_in_order(self): @@ -836,7 +856,11 @@ cdef class SyclQueue(_SyclQueue): def __repr__(self): cdef cpp_bool in_order = DPCTLQueue_IsInOrder(self._queue_ref) if in_order: - return "".format(hex(id(self))) + return ( + "".format(hex(id(self))) + ) else: return "".format(hex(id(self))) @@ -845,4 +869,6 @@ cdef class SyclQueue(_SyclQueue): QRef = DPCTLQueue_Copy(self._queue_ref) if (QRef is NULL): raise ValueError("SyclQueue copy failed.") - return pycapsule.PyCapsule_New(QRef, "SyclQueueRef", &_queue_capsule_deleter) + return pycapsule.PyCapsule_New( + QRef, "SyclQueueRef", &_queue_capsule_deleter + ) diff --git a/dpctl/_sycl_queue_manager.pyx b/dpctl/_sycl_queue_manager.pyx index 7e7454ccdd..6861757475 100644 --- a/dpctl/_sycl_queue_manager.pyx +++ b/dpctl/_sycl_queue_manager.pyx @@ -18,22 +18,25 @@ # cython: language_level=3 from __future__ import print_function + import logging -from . import backend_type, device_type -from ._backend cimport( - _backend_type, - _device_type, +from contextlib import contextmanager + +from .enum_types import backend_type, device_type + +from ._backend cimport ( # noqa: E211 DPCTLQueueMgr_GetCurrentQueue, + DPCTLQueueMgr_GetQueueStackSize, DPCTLQueueMgr_GlobalQueueIsCurrent, - DPCTLQueueMgr_PushQueue, DPCTLQueueMgr_PopQueue, + DPCTLQueueMgr_PushQueue, DPCTLQueueMgr_SetGlobalQueue, DPCTLSyclQueueRef, - DPCTLQueueMgr_GetQueueStackSize, + _backend_type, + _device_type, ) from ._sycl_context cimport SyclContext - __all__ = [ "device_context", "get_current_backend", @@ -167,8 +170,8 @@ _mgr = _SyclQueueManager() # Global bound functions get_num_activated_queues = _mgr.get_num_activated_queues -set_global_queue = _mgr.set_global_queue -is_in_device_context = _mgr.is_in_device_context +set_global_queue = _mgr.set_global_queue +is_in_device_context = _mgr.is_in_device_context cpdef SyclQueue get_current_queue(): @@ -209,9 +212,6 @@ cpdef get_current_backend(): return _mgr.get_current_backend() -from contextlib import contextmanager - - @contextmanager def device_context(arg): """ diff --git a/dpctl/enum_types.py b/dpctl/enum_types.py index ed62512dce..2c2bd4edca 100644 --- a/dpctl/enum_types.py +++ b/dpctl/enum_types.py @@ -22,7 +22,6 @@ """ from enum import Enum, auto - __all__ = [ "device_type", "backend_type", diff --git a/dpctl/memory/__init__.py b/dpctl/memory/__init__.py index bcc5591f8b..6fcbeb372e 100644 --- a/dpctl/memory/__init__.py +++ b/dpctl/memory/__init__.py @@ -30,7 +30,6 @@ `memoryview`, or `array.array` classes. """ -from ._memory import MemoryUSMShared, MemoryUSMDevice, MemoryUSMHost -from ._memory import __all__ as _memory__all__ +from ._memory import MemoryUSMDevice, MemoryUSMHost, MemoryUSMShared -__all__ = _memory__all__ +__all__ = ["MemoryUSMDevice", "MemoryUSMHost", "MemoryUSMShared"] diff --git a/dpctl/memory/_memory.pxd b/dpctl/memory/_memory.pxd index 2723d3435a..1a2a573e66 100644 --- a/dpctl/memory/_memory.pxd +++ b/dpctl/memory/_memory.pxd @@ -22,7 +22,7 @@ in dpctl.memory._memory.pyx. """ -from .._backend cimport DPCTLSyclUSMRef, DPCTLSyclQueueRef +from .._backend cimport DPCTLSyclQueueRef, DPCTLSyclUSMRef from .._sycl_context cimport SyclContext from .._sycl_device cimport SyclDevice from .._sycl_queue cimport SyclQueue diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index 4bf5f23914..44211d155e 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -24,18 +24,36 @@ use in other Python modules. import dpctl -from dpctl._backend cimport * + +from cpython cimport Py_buffer, pycapsule +from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize + +from dpctl._backend cimport ( # noqa: E211 + DPCTLaligned_alloc_device, + DPCTLaligned_alloc_host, + DPCTLaligned_alloc_shared, + DPCTLfree_with_queue, + DPCTLmalloc_device, + DPCTLmalloc_host, + DPCTLmalloc_shared, + DPCTLQueue_Copy, + DPCTLQueue_Create, + DPCTLQueue_Delete, + DPCTLQueue_Memcpy, + DPCTLSyclContextRef, + DPCTLSyclDeviceRef, + DPCTLUSM_GetPointerDevice, + DPCTLUSM_GetPointerType, +) + from .._sycl_context cimport SyclContext from .._sycl_device cimport SyclDevice from .._sycl_queue cimport SyclQueue -from cpython cimport Py_buffer -from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_FromStringAndSize -from cpython cimport pycapsule +import collections +import numbers import numpy as np -import numbers -import collections __all__ = [ "MemoryUSMShared", @@ -73,7 +91,7 @@ cdef void copy_via_host(void *dest_ptr, SyclQueue dest_queue, ) -def _to_memory(unsigned char [::1] b, str usm_kind): +def _to_memory(unsigned char[::1] b, str usm_kind): """ Constructs Memory of the same size as the argument and copies data into it""" @@ -116,25 +134,31 @@ cdef class _Memory: if (ptr_type == b"shared"): if alignment > 0: - p = DPCTLaligned_alloc_shared(alignment, nbytes, - queue.get_queue_ref()) + p = DPCTLaligned_alloc_shared( + alignment, nbytes, queue.get_queue_ref() + ) else: p = DPCTLmalloc_shared(nbytes, queue.get_queue_ref()) elif (ptr_type == b"host"): if alignment > 0: - p = DPCTLaligned_alloc_host(alignment, nbytes, - queue.get_queue_ref()) + p = DPCTLaligned_alloc_host( + alignment, nbytes, queue.get_queue_ref() + ) else: p = DPCTLmalloc_host(nbytes, queue.get_queue_ref()) elif (ptr_type == b"device"): if (alignment > 0): - p = DPCTLaligned_alloc_device(alignment, nbytes, - queue.get_queue_ref()) + p = DPCTLaligned_alloc_device( + alignment, nbytes, queue.get_queue_ref() + ) else: p = DPCTLmalloc_device(nbytes, queue.get_queue_ref()) else: - raise RuntimeError("Pointer type is unknown: {}" \ - .format(ptr_type.decode("UTF-8"))) + raise RuntimeError( + "Pointer type is unknown: {}".format( + ptr_type.decode("UTF-8") + ) + ) if (p): self.memory_ptr = p @@ -178,19 +202,20 @@ cdef class _Memory: def __dealloc__(self): if (self.refobj is None and self.memory_ptr): - DPCTLfree_with_queue(self.memory_ptr, - self.queue.get_queue_ref()) + DPCTLfree_with_queue( + self.memory_ptr, self.queue.get_queue_ref() + ) self._cinit_empty() cdef _getbuffer(self, Py_buffer *buffer, int flags): # memory_ptr is Ref which is pointer to SYCL type. For USM it is void*. cdef SyclContext ctx = self._context - cdef const char * kind = DPCTLUSM_GetPointerType( - self.memory_ptr, - ctx.get_context_ref()) + cdef const char *kind = DPCTLUSM_GetPointerType( + self.memory_ptr, ctx.get_context_ref() + ) if kind == b'device': raise ValueError('USM Device memory is not host accessible') - buffer.buf = self.memory_ptr + buffer.buf = self.memory_ptr buffer.format = 'B' # byte buffer.internal = NULL # see References buffer.itemsize = 1 @@ -227,8 +252,10 @@ cdef class _Memory: return self.refobj def __repr__(self): - return "" \ + return ( + "" .format(self.nbytes, hex((self.memory_ptr))) + ) def __len__(self): return self.nbytes @@ -246,7 +273,7 @@ cdef class _Memory: def __get__(self): cdef dict iface = { "data": ((self.memory_ptr), - True), # bool(self.writeable)), + True), # bool(self.writeable)), "shape": (self.nbytes,), "strides": None, "typestr": "|u1", @@ -261,25 +288,32 @@ cdef class _Memory: cdef SyclQueue q if syclobj is None: ctx = self._context - kind = DPCTLUSM_GetPointerType(self.memory_ptr, - ctx.get_context_ref()) + kind = DPCTLUSM_GetPointerType( + self.memory_ptr, ctx.get_context_ref() + ) elif isinstance(syclobj, SyclContext): ctx = (syclobj) - kind = DPCTLUSM_GetPointerType(self.memory_ptr, - ctx.get_context_ref()) + kind = DPCTLUSM_GetPointerType( + self.memory_ptr, ctx.get_context_ref() + ) elif isinstance(syclobj, SyclQueue): q = (syclobj) ctx = q.get_sycl_context() - kind = DPCTLUSM_GetPointerType(self.memory_ptr, - ctx.get_context_ref()) + kind = DPCTLUSM_GetPointerType( + self.memory_ptr, ctx.get_context_ref() + ) else: - raise ValueError("syclobj keyword can be either None, " - "or an instance of SyclContext or SyclQueue") + raise ValueError( + "syclobj keyword can be either None, or an instance of " + "SyclContext or SyclQueue" + ) return kind.decode('UTF-8') cpdef copy_to_host(self, obj=None): - """Copy content of instance's memory into memory of - `obj`, or allocate NumPy array of obj is None""" + """ + Copy content of instance's memory into memory of ``obj``, or allocate + NumPy array of ``obj`` is ``None``. + """ # Cython does the right thing here cdef unsigned char[::1] host_buf = obj @@ -289,8 +323,10 @@ cdef class _Memory: obj = np.empty((self.nbytes,), dtype="|u1") host_buf = obj elif (len(host_buf) < self.nbytes): - raise ValueError("Destination object is too small to " - "accommodate {} bytes".format(self.nbytes)) + raise ValueError( + "Destination object is too small to accommodate {} bytes" + .format(self.nbytes) + ) # call kernel to copy from DPCTLQueue_Memcpy( self.queue.get_queue_ref(), @@ -309,9 +345,10 @@ cdef class _Memory: cdef Py_ssize_t buf_len = len(host_buf) if (buf_len > self.nbytes): - raise ValueError("Source object is too large to be " - "accommodated in {} bytes buffer".format( - self.nbytes)) + raise ValueError( + "Source object is too large to be accommodated in {} bytes " + "buffer".format(self.nbytes) + ) # call kernel to copy from DPCTLQueue_Memcpy( self.queue.get_queue_ref(), @@ -321,22 +358,27 @@ cdef class _Memory: ) cpdef copy_from_device(self, object sycl_usm_ary): - """Copy SYCL memory underlying the argument object into - the memory of the instance""" + """ + Copy SYCL memory underlying the argument object into + the memory of the instance + """ cdef _USMBufferData src_buf cdef const char* kind if not hasattr(sycl_usm_ary, '__sycl_usm_array_interface__'): - raise ValueError("Object does not implement " - "`__sycl_usm_array_interface__` protocol") + raise ValueError( + "Object does not implement " + "`__sycl_usm_array_interface__` protocol" + ) sycl_usm_ary_iface = sycl_usm_ary.__sycl_usm_array_interface__ if isinstance(sycl_usm_ary_iface, dict): src_buf = _USMBufferData.from_sycl_usm_ary_iface(sycl_usm_ary_iface) if (src_buf.nbytes > self.nbytes): - raise ValueError("Source object is too large to " - "be accommondated in {} bytes buffer".format( - self.nbytes)) + raise ValueError( + "Source object is too large to " + "be accommondated in {} bytes buffer".format(self.nbytes) + ) kind = DPCTLUSM_GetPointerType( src_buf.p, self.queue.get_sycl_context().get_context_ref()) if (kind == b'unknown'): @@ -356,14 +398,16 @@ cdef class _Memory: raise TypeError cpdef bytes tobytes(self): - """Constructs bytes object populated with copy of USM memory""" + """ + Constructs bytes object populated with copy of USM memory. + """ cdef Py_ssize_t nb = self.nbytes cdef bytes b = PyBytes_FromStringAndSize(NULL, nb) # convert bytes to memory view cdef unsigned char* ptr = PyBytes_AS_STRING(b) # string is null terminated cdef unsigned char[::1] mv = (ptr)[:nb] - self.copy_to_host(mv) # output is discarded + self.copy_to_host(mv) # output is discarded return b @staticmethod @@ -373,7 +417,8 @@ cdef class _Memory: given sycl context `ctx` """ cdef DPCTLSyclDeviceRef dref = DPCTLUSM_GetPointerDevice( - p, ctx.get_context_ref()) + p, ctx.get_context_ref() + ) return SyclDevice._create(dref) @@ -381,7 +426,8 @@ cdef class _Memory: cdef bytes get_pointer_type(DPCTLSyclUSMRef p, SyclContext ctx): """Returns USM-type of given pointer `p` in given sycl context `ctx`""" cdef const char * usm_type = DPCTLUSM_GetPointerType( - p, ctx.get_context_ref()) + p, ctx.get_context_ref() + ) return usm_type @@ -418,7 +464,8 @@ cdef class MemoryUSMShared(_Memory): "copy=False. " "Either use copy=True, or use a constructor " "appropriate for " - "type '{}'".format(other, self.get_usm_type())) + "type '{}'".format(other, self.get_usm_type()) + ) def __getbuffer__(self, Py_buffer *buffer, int flags): self._getbuffer(buffer, flags) @@ -445,8 +492,9 @@ cdef class MemoryUSMHost(_Memory): self._cinit_other(other) if (self.get_usm_type() != "host"): if copy: - self._cinit_alloc(0, self.nbytes, - b"host", queue) + self._cinit_alloc( + 0, self.nbytes, b"host", queue + ) self.copy_from_device(other) else: raise ValueError( @@ -455,7 +503,9 @@ cdef class MemoryUSMHost(_Memory): "Zero-copy operation is not possible with copy=False. " "Either use copy=True, or use a constructor " "appropriate for type '{}'".format( - other, self.get_usm_type())) + other, self.get_usm_type() + ) + ) def __getbuffer__(self, Py_buffer *buffer, int flags): self._getbuffer(buffer, flags) @@ -482,8 +532,9 @@ cdef class MemoryUSMDevice(_Memory): self._cinit_other(other) if (self.get_usm_type() != "device"): if copy: - self._cinit_alloc(0, self.nbytes, - b"device", queue) + self._cinit_alloc( + 0, self.nbytes, b"device", queue + ) self.copy_from_device(other) else: raise ValueError( @@ -492,4 +543,6 @@ cdef class MemoryUSMDevice(_Memory): "Zero-copy operation is not possible with copy=False. " "Either use copy=True, or use a constructor " "appropriate for type '{}'".format( - other, self.get_usm_type())) + other, self.get_usm_type() + ) + ) diff --git a/dpctl/program/__init__.py b/dpctl/program/__init__.py index 9e79ab9b6a..58a46bb2eb 100644 --- a/dpctl/program/__init__.py +++ b/dpctl/program/__init__.py @@ -21,7 +21,18 @@ source string or SPIR-V binary file. """ -from ._program import * -from ._program import __all__ as _program__all__ +from ._program import ( + SyclKernel, + SyclProgram, + SyclProgramCompilationError, + create_program_from_source, + create_program_from_spirv, +) -__all__ = _program__all__ +__all__ = [ + "create_program_from_source", + "create_program_from_spirv", + "SyclKernel", + "SyclProgram", + "SyclProgramCompilationError", +] diff --git a/dpctl/program/_program.pyx b/dpctl/program/_program.pyx index 2eb2b6182d..9b5601554f 100644 --- a/dpctl/program/_program.pyx +++ b/dpctl/program/_program.pyx @@ -24,9 +24,22 @@ a OpenCL source string or a SPIR-V binary file. """ +cimport cython.array -from __future__ import print_function -from dpctl._backend cimport * +from dpctl._backend cimport ( # noqa: E211, E402 + DPCTLCString_Delete, + DPCTLKernel_Delete, + DPCTLKernel_GetFunctionName, + DPCTLKernel_GetNumArgs, + DPCTLProgram_CreateFromOCLSource, + DPCTLProgram_CreateFromSpirv, + DPCTLProgram_Delete, + DPCTLProgram_GetKernel, + DPCTLProgram_HasKernel, + DPCTLSyclContextRef, + DPCTLSyclKernelRef, + DPCTLSyclProgramRef, +) __all__ = [ "create_program_from_source", @@ -37,11 +50,12 @@ __all__ = [ ] cdef class SyclProgramCompilationError(Exception): - """This exception is raised when a SYCL program could not be built from + """This exception is raised when a ``sycl::program`` could not be built from either a SPIR-V binary file or a string source. """ pass + cdef class SyclKernel: """ """ @@ -56,8 +70,8 @@ cdef class SyclKernel: DPCTLKernel_Delete(self._kernel_ref) DPCTLCString_Delete(self._function_name) - def get_function_name (self): - """ Returns the name of the Kernel function. + def get_function_name(self): + """ Returns the name of the ``sycl::kernel`` function. """ return self._function_name.decode() @@ -66,32 +80,33 @@ cdef class SyclKernel: """ return DPCTLKernel_GetNumArgs(self._kernel_ref) - cdef DPCTLSyclKernelRef get_kernel_ref (self): - """ Returns the DPCTLSyclKernelRef pointer for this SyclKernel. + cdef DPCTLSyclKernelRef get_kernel_ref(self): + """ Returns the ``DPCTLSyclKernelRef`` pointer for this SyclKernel. """ return self._kernel_ref def addressof_ref(self): - """ Returns the address of the C API DPCTLSyclKernelRef pointer - as a size_t. + """ Returns the address of the C API ``DPCTLSyclKernelRef`` pointer + as a ``size_t``. Returns: - The address of the DPCTLSyclKernelRef object used to create this - SyclKernel cast to a size_t. + The address of the ``DPCTLSyclKernelRef`` pointer used to create + this :class:`dpctl.SyclKernel` object cast to a ``size_t``. """ return int(self._kernel_ref) + cdef class SyclProgram: - """ Wraps a sycl::program object created from an OpenCL interoperability + """ Wraps a ``sycl::program`` object created from an OpenCL interoperability program. - SyclProgram exposes the C API from dpctl_sycl_program_interface.h. A + SyclProgram exposes the C API from ``dpctl_sycl_program_interface.h``. A SyclProgram can be created from either a source string or a SPIR-V binary file. """ @staticmethod - cdef SyclProgram _create (DPCTLSyclProgramRef pref): + cdef SyclProgram _create(DPCTLSyclProgramRef pref): cdef SyclProgram ret = SyclProgram.__new__(SyclProgram) ret._program_ref = pref return ret @@ -99,7 +114,7 @@ cdef class SyclProgram: def __dealloc__(self): DPCTLProgram_Delete(self._program_ref) - cdef DPCTLSyclProgramRef get_program_ref (self): + cdef DPCTLSyclProgramRef get_program_ref(self): return self._program_ref cpdef SyclKernel get_sycl_kernel(self, str kernel_name): @@ -116,18 +131,19 @@ cdef class SyclProgram: as a long. Returns: - The address of the DPCTLSyclProgramRef object used to create this - SyclProgram cast to a long. + The address of the ``DPCTLSyclProgramRef`` pointer used to create + this :class:`dpctl.SyclProgram` object cast to a ``size_t``. """ return int(self._program_ref) + cpdef create_program_from_source(SyclQueue q, unicode src, unicode copts=""): """ Creates a Sycl interoperability program from an OpenCL source string. - We use the DPCTLProgram_CreateFromOCLSource() C API function to create - a Sycl progrma from an OpenCL source program that can contain multiple - kernels. Note currently only supported for OpenCL. + We use the ``DPCTLProgram_CreateFromOCLSource()`` C API function to + create a ``sycl::program`` from an OpenCL source program that can + contain multiple kernels. Note currently only supported for OpenCL. Parameters: q (SyclQueue) : The :class:`SyclQueue` for which the @@ -137,7 +153,8 @@ cpdef create_program_from_source(SyclQueue q, unicode src, unicode copts=""): when compiling the program. Returns: - program (SyclProgram): A :class:`SyclProgram` object wrapping the sycl::program returned by the C API. + program (SyclProgram): A :class:`SyclProgram` object wrapping the + ``sycl::program`` returned by the C API. Raises: SyclProgramCompilationError: If a SYCL program could not be created. @@ -156,15 +173,14 @@ cpdef create_program_from_source(SyclQueue q, unicode src, unicode copts=""): return SyclProgram._create(Pref) -cimport cython.array cpdef create_program_from_spirv(SyclQueue q, const unsigned char[:] IL, unicode copts=""): """ Creates a Sycl interoperability program from an SPIR-V binary. - We use the DPCTLProgram_CreateFromOCLSpirv() C API function to create - a Sycl progrma from an compiled SPIR-V binary file. + We use the ``DPCTLProgram_CreateFromOCLSpirv()`` C API function to + create a ``sycl::program`` object from an compiled SPIR-V binary file. Parameters: q (SyclQueue): The :class:`SyclQueue` for which the @@ -174,7 +190,8 @@ cpdef create_program_from_spirv(SyclQueue q, const unsigned char[:] IL, when compiling the program. Returns: - program (SyclProgram): A :class:`SyclProgram` object wrapping the sycl::program returned by the C API. + program (SyclProgram): A :class:`SyclProgram` object wrapping the + ``sycl::program`` returned by the C API. Raises: SyclProgramCompilationError: If a SYCL program could not be created. diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 4f74282ce9..a393af69fe 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -17,14 +17,13 @@ """ **Data Parallel Tensor Collection** - `dpctl.tensor` is an experimental collection of tensor implementations - that will implement future Python data API (https://data-apis.github.io/array-api/latest/). + ``dpctl.tensor`` is an experimental collection of tensor implementations + that will implement future Python data API + (https://data-apis.github.io/array-api/latest/). Available tensor implementations: - * `numpy_usm_shared`: Provides a `numpy.ndarray` sub-class whose \ - underlying memory buffer is allocated with a USM shared memory allocator. + * ``numpy_usm_shared``: Provides a ``numpy.ndarray`` sub-class whose + underlying memory buffer is allocated with a USM shared memory allocator. """ - -import dpctl.tensor.numpy_usm_shared diff --git a/dpctl/tensor/numpy_usm_shared.py b/dpctl/tensor/numpy_usm_shared.py index e65a1a0849..b7e389509e 100644 --- a/dpctl/tensor/numpy_usm_shared.py +++ b/dpctl/tensor/numpy_usm_shared.py @@ -16,10 +16,10 @@ """Provides an implementation of a SYCL malloc_shared allocator-aware Numpy. -The modules includes a `numpy.ndarray` sub-class whose underlying memory buffer -is allocated with a SYCL malloc_shared memory allocator. The malloc_shared -allocated array is meant to be interoperable with Python extension modules that -use SYCL and support USM. +The modules includes a ``numpy.ndarray`` sub-class whose underlying memory +buffer is allocated with a SYCL malloc_shared memory allocator. The +``malloc_shared`` allocated array is meant to be interoperable with Python +extension modules that use SYCL and support USM. The module also includes all NumPy classes and functions and the module can be used as a drop-in replacement for standard NumPy. Note that this module does @@ -28,14 +28,14 @@ """ -import numpy as np -from inspect import getmembers, isfunction, isclass, isbuiltin -from numbers import Number +import builtins import sys -import inspect -import dpctl +from inspect import getmembers, isbuiltin, isclass, isfunction +from numbers import Number + +import numpy as np + from dpctl.memory import MemoryUSMShared -import builtins debug = False @@ -83,7 +83,8 @@ def convert_ndarray_to_np_ndarray(x, require_ndarray=False): return np.array(x, copy=False, subok=False) elif isinstance(x, tuple): return tuple( - convert_ndarray_to_np_ndarray(y, require_ndarray=require_ndarray) for y in x + convert_ndarray_to_np_ndarray(y, require_ndarray=require_ndarray) + for y in x ) elif require_ndarray: raise TypeError @@ -103,7 +104,13 @@ def add_external_usm_checker(func): ndarray.external_usm_checkers.append(func) def __new__( - subtype, shape, dtype=float, buffer=None, offset=0, strides=None, order=None + subtype, + shape, + dtype=float, + buffer=None, + offset=0, + strides=None, + order=None, ): # Create a new array. if buffer is None: @@ -136,7 +143,10 @@ def __new__( return new_obj # zero copy if buffer is a usm backed array-like thing elif hasattr(buffer, array_interface_property): - dprint("numpy_usm_shared::ndarray __new__ buffer", array_interface_property) + dprint( + "numpy_usm_shared::ndarray __new__ buffer", + array_interface_property, + ) # also check for array interface new_obj = np.ndarray.__new__( subtype, @@ -158,7 +168,10 @@ def __new__( ) return new_obj else: - dprint("numpy_usm_shared::ndarray __new__ buffer not None and not sycl_usm") + dprint( + "numpy_usm_shared::ndarray __new__ buffer not None " + "and not sycl_usm" + ) nelems = np.prod(shape) # must copy ar = np.ndarray( @@ -236,8 +249,8 @@ def __array_finalize__(self, obj): # Just raise an exception since __array_ufunc__ makes all # reasonable cases not need the code below. raise ValueError( - "Non-USM allocated ndarray can not viewed as a USM-allocated \ - one without a copy" + "Non-USM allocated ndarray can not viewed as a USM-allocated " + "one without a copy." ) # Tell Numba to not treat this type just like a NumPy ndarray but to @@ -276,14 +289,15 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): else: return NotImplemented # Have to avoid recursive calls to array_ufunc here. - # If no out kwarg then we create a numpy_usm_shared out so that we get - # USM memory. However, if kwarg has numpy_usm_shared-typed out then - # array_ufunc is called recursively so we cast out as regular + # If no out kwarg then we create a numpy_usm_shared out so that we + # get USM memory. However, if kwarg has numpy_usm_shared-typed out + # then array_ufunc is called recursively so we cast out as regular # NumPy ndarray (having a USM data pointer). out_arg = kwargs.get("out", None) if out_arg is None: # maybe copy? - # deal with multiple returned arrays, so kwargs['out'] can be tuple + # deal with multiple returned arrays, so kwargs['out'] can be + # tuple. res_type = np.result_type(*typing) out_arg = empty(inputs[0].shape, dtype=res_type) out_as_np = convert_ndarray_to_np_ndarray(out_arg) @@ -335,9 +349,13 @@ def __array_function__(self, func, types, args, kwargs): if has_func: cm = sys.modules[__name__] affunc = getattr(cm, fname) - fargs = [x.view(np.ndarray) if isinstance(x, ndarray) else x for x in args] + fargs = [ + x.view(np.ndarray) if isinstance(x, ndarray) else x + for x in args + ] fkwargs = { - key: convert_ndarray_to_np_ndarray(val) for key, val in kwargs.items() + key: convert_ndarray_to_np_ndarray(val) + for key, val in kwargs.items() } res = affunc(*fargs, **fkwargs) return kwargs["out"] if "out" in kwargs else res diff --git a/dpctl/tests/__init__.py b/dpctl/tests/__init__.py index 2170cd60ba..2fc36c98cd 100644 --- a/dpctl/tests/__init__.py +++ b/dpctl/tests/__init__.py @@ -16,15 +16,3 @@ """Top-level module of all dpctl Python unit test cases. """ - -from .test_dparray import * -from .test_sycl_device import * -from .test_sycl_context import * -from .test_sycl_kernel_submit import * -from .test_sycl_platform import * -from .test_sycl_program import * -from .test_sycl_queue import * -from .test_sycl_queue_manager import * -from .test_sycl_queue_memcpy import * -from .test_sycl_usm import * -from .test_dparray import * diff --git a/dpctl/tests/test_dparray.py b/dpctl/tests/test_dparray.py index ad98b98633..881aa79f44 100644 --- a/dpctl/tests/test_dparray.py +++ b/dpctl/tests/test_dparray.py @@ -18,9 +18,11 @@ """ import unittest -from dpctl.tensor import numpy_usm_shared as dparray + import numpy +from dpctl.tensor import numpy_usm_shared as dparray + class Test_dparray(unittest.TestCase): def setUp(self): diff --git a/dpctl/tests/test_sycl_context.py b/dpctl/tests/test_sycl_context.py index 6cefd989e4..c6b6954daa 100644 --- a/dpctl/tests/test_sycl_context.py +++ b/dpctl/tests/test_sycl_context.py @@ -17,9 +17,9 @@ """ Defines unit test cases for the SyclContxt class. """ -import dpctl import pytest +import dpctl list_of_standard_selectors = [ dpctl.select_accelerator_device, @@ -52,6 +52,7 @@ "abc", ] + # Unit test cases that will be run for every device def check_get_max_compute_units(device): max_compute_units = device.max_compute_units @@ -293,8 +294,8 @@ def check(request): def test_standard_selectors(device_selector, check): - """Tests if the standard SYCL device_selectors are able to select a - device. + """ + Tests if the standard SYCL device_selectors are able to select a device. """ try: device = device_selector() @@ -309,12 +310,13 @@ def test_standard_selectors(device_selector, check): def test_current_device(check): - """Test is the device for the current queue is valid.""" + """ + Test is the device for the current queue is valid. + """ try: q = dpctl.get_current_queue() except Exception: pytest.fail("Encountered an exception inside get_current_queue().") - device = q.get_sycl_device() ctx = q.get_sycl_context() devs = ctx.get_devices() # add check that device is among devs @@ -322,7 +324,10 @@ def test_current_device(check): def test_valid_filter_selectors(valid_filter, check): - """Tests if we can create a SyclDevice using a supported filter selector string.""" + """ + Tests if we can create a SyclDevice using a supported filter selector + string. + """ device = None try: ctx = dpctl.SyclContext(valid_filter) @@ -333,11 +338,12 @@ def test_valid_filter_selectors(valid_filter, check): def test_invalid_filter_selectors(invalid_filter): - """An invalid filter string should always be caught and a SyclQueueCreationError - raised. + """ + An invalid filter string should always be caught and a + SyclQueueCreationError raised. """ with pytest.raises(ValueError): - q = dpctl.SyclContext(invalid_filter) + dpctl.SyclContext(invalid_filter) def test_context_not_equals(): @@ -369,7 +375,7 @@ def test_context_can_be_used_in_queue(valid_filter): devs = ctx.get_devices() assert len(devs) == ctx.device_count for d in devs: - q = dpctl.SyclQueue(ctx, d) + dpctl.SyclQueue(ctx, d) def test_context_can_be_used_in_queue2(valid_filter): @@ -381,7 +387,7 @@ def test_context_can_be_used_in_queue2(valid_filter): # skip test for devices rejected by default selector pytest.skip() ctx = dpctl.SyclContext(d) - q = dpctl.SyclQueue(ctx, d) + dpctl.SyclQueue(ctx, d) def test_context_multi_device(): diff --git a/dpctl/tests/test_sycl_device.py b/dpctl/tests/test_sycl_device.py index caf176d319..02c25fef39 100644 --- a/dpctl/tests/test_sycl_device.py +++ b/dpctl/tests/test_sycl_device.py @@ -17,8 +17,9 @@ """ Defines unit test cases for the SyclDevice class. """ -import dpctl import pytest + +import dpctl from dpctl._sycl_device import SubDeviceCreationError list_of_standard_selectors = [ @@ -52,6 +53,7 @@ "abc", ] + # Unit test cases that will be run for every device def check_get_max_compute_units(device): max_compute_units = device.max_compute_units @@ -351,7 +353,9 @@ def check_create_sub_devices_equally(device): n = int(device.max_compute_units / 2) device.create_sub_devices(partition=n) except SubDeviceCreationError: - pytest.skip("create_sub_devices can't create sub-devices on this device") + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) except Exception: pytest.fail("create_sub_devices failed") @@ -361,7 +365,9 @@ def check_create_sub_devices_by_counts(device): n = device.max_compute_units / 2 device.create_sub_devices(partition=(n, n)) except SubDeviceCreationError: - pytest.skip("create_sub_devices can't create sub-devices on this device") + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) except Exception: pytest.fail("create_sub_devices failed") @@ -370,7 +376,9 @@ def check_create_sub_devices_by_affinity_not_applicable(device): try: device.create_sub_devices(partition="not_applicable") except SubDeviceCreationError: - pytest.skip("create_sub_devices can't create sub-devices on this device") + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) except Exception: pytest.fail("create_sub_devices failed") @@ -379,7 +387,9 @@ def check_create_sub_devices_by_affinity_numa(device): try: device.create_sub_devices(partition="numa") except SubDeviceCreationError: - pytest.skip("create_sub_devices can't create sub-devices on this device") + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) except Exception: pytest.fail("create_sub_devices failed") @@ -388,7 +398,9 @@ def check_create_sub_devices_by_affinity_L4_cache(device): try: device.create_sub_devices(partition="L4_cache") except SubDeviceCreationError: - pytest.skip("create_sub_devices can't create sub-devices on this device") + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) except Exception: pytest.fail("create_sub_devices failed") @@ -397,7 +409,9 @@ def check_create_sub_devices_by_affinity_L3_cache(device): try: device.create_sub_devices(partition="L3_cache") except SubDeviceCreationError: - pytest.skip("create_sub_devices can't create sub-devices on this device") + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) except Exception: pytest.fail("create_sub_devices failed") @@ -406,7 +420,9 @@ def check_create_sub_devices_by_affinity_L2_cache(device): try: device.create_sub_devices(partition="L2_cache") except SubDeviceCreationError: - pytest.skip("create_sub_devices can't create sub-devices on this device") + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) except Exception: pytest.fail("create_sub_devices failed") @@ -415,7 +431,9 @@ def check_create_sub_devices_by_affinity_L1_cache(device): try: device.create_sub_devices(partition="L1_cache") except SubDeviceCreationError: - pytest.skip("create_sub_devices can't create sub-devices on this device") + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) except Exception: pytest.fail("create_sub_devices failed") @@ -424,7 +442,9 @@ def check_create_sub_devices_by_affinity_next_partitionable(device): try: device.create_sub_devices(partition="next_partitionable") except SubDeviceCreationError: - pytest.skip("create_sub_devices can't create sub-devices on this device") + pytest.skip( + "create_sub_devices can't create sub-devices on this device" + ) except Exception: pytest.fail("create_sub_devices failed") @@ -534,7 +554,10 @@ def test_current_device(check): def test_valid_filter_selectors(valid_filter, check): - """Tests if we can create a SyclDevice using a supported filter selector string.""" + """ + Tests if we can create a SyclDevice using a supported filter selector + string. + """ device = None try: device = dpctl.SyclDevice(valid_filter) @@ -544,8 +567,8 @@ def test_valid_filter_selectors(valid_filter, check): def test_invalid_filter_selectors(invalid_filter): - """An invalid filter string should always be caught and a ValueError - raised. + """ + An invalid filter string should always be caught and a ValueError raised. """ with pytest.raises(ValueError): - device = dpctl.SyclDevice(invalid_filter) + dpctl.SyclDevice(invalid_filter) diff --git a/dpctl/tests/test_sycl_device_factory.py b/dpctl/tests/test_sycl_device_factory.py index 0271f1d0fd..8b269767ff 100644 --- a/dpctl/tests/test_sycl_device_factory.py +++ b/dpctl/tests/test_sycl_device_factory.py @@ -17,10 +17,12 @@ """ Defines unit test cases for the _sycl_device_factory module """ -import dpctl -from dpctl import backend_type as bty, device_type as dty import pytest +import dpctl +from dpctl import backend_type as bty +from dpctl import device_type as dty + argument_list_1 = [ (bty.level_zero, dty.gpu), (bty.opencl, dty.gpu), diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index 0edae2cf94..ddcda2609f 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -18,12 +18,15 @@ """ import ctypes -import dpctl import unittest + +import numpy as np + +import dpctl import dpctl.memory as dpctl_mem import dpctl.program as dpctl_prog -import numpy as np -from ._helper import has_cpu, has_gpu + +from ._helper import has_gpu @unittest.skipUnless(has_gpu(), "No OpenCL GPU queues available") diff --git a/dpctl/tests/test_sycl_platform.py b/dpctl/tests/test_sycl_platform.py index e12de90268..4ade1e9ea5 100644 --- a/dpctl/tests/test_sycl_platform.py +++ b/dpctl/tests/test_sycl_platform.py @@ -17,8 +17,10 @@ """Defines unit test cases for the SyclPlatform class. """ -import dpctl import pytest + +import dpctl + from ._helper import has_sycl_platforms list_of_valid_filter_selectors = [ @@ -121,7 +123,7 @@ def test_invalid_platform_creation(invalid_filter, check): string. """ with pytest.raises(ValueError): - platform = dpctl.SyclPlatform(invalid_filter) + dpctl.SyclPlatform(invalid_filter) def test_lsplatform(): diff --git a/dpctl/tests/test_sycl_program.py b/dpctl/tests/test_sycl_program.py index 287dae2756..4718adf979 100644 --- a/dpctl/tests/test_sycl_program.py +++ b/dpctl/tests/test_sycl_program.py @@ -17,11 +17,13 @@ """Defines unit test cases for the SyclProgram and SyclKernel classes """ +import os +import unittest + import dpctl import dpctl.program as dpctl_prog -import unittest -import os -from ._helper import has_cpu, has_gpu + +from ._helper import has_gpu @unittest.skipUnless(has_gpu(), "No OpenCL GPU queues available") @@ -98,7 +100,7 @@ def test_create_program_from_spirv(self): spirv = fin.read() with dpctl.device_context("level_zero:gpu:0"): q = dpctl.get_current_queue() - prog = dpctl_prog.create_program_from_spirv(q, spirv) + dpctl_prog.create_program_from_spirv(q, spirv) @unittest.expectedFailure def test_create_program_from_source(self): @@ -113,7 +115,7 @@ def test_create_program_from_source(self): }" with dpctl.device_context("level_zero:gpu:0"): q = dpctl.get_current_queue() - prog = dpctl_prog.create_program_from_source(q, oclSrc) + dpctl_prog.create_program_from_source(q, oclSrc) if __name__ == "__main__": diff --git a/dpctl/tests/test_sycl_queue.py b/dpctl/tests/test_sycl_queue.py index 31683e22db..c9739f0cda 100644 --- a/dpctl/tests/test_sycl_queue.py +++ b/dpctl/tests/test_sycl_queue.py @@ -17,9 +17,10 @@ """ Defines unit test cases for the SyclQueue class. """ -import dpctl import pytest +import dpctl + list_of_standard_selectors = [ dpctl.select_accelerator_device, dpctl.select_cpu_device, @@ -51,6 +52,7 @@ "abc", ] + # Unit test cases that will be run for every device def check_get_max_compute_units(device): max_compute_units = device.max_compute_units @@ -292,8 +294,8 @@ def check(request): def test_standard_selectors(device_selector, check): - """Tests if the standard SYCL device_selectors are able to select a - device. + """ + Tests if the standard SYCL device_selectors are able to select a device. """ try: device = device_selector() @@ -306,7 +308,9 @@ def test_standard_selectors(device_selector, check): def test_current_device(check): - """Test is the device for the current queue is valid.""" + """ + Test is the device for the current queue is valid. + """ try: q = dpctl.get_current_queue() except Exception: @@ -316,7 +320,10 @@ def test_current_device(check): def test_valid_filter_selectors(valid_filter, check): - """Tests if we can create a SyclDevice using a supported filter selector string.""" + """ + Tests if we can create a SyclDevice using a supported filter selector + string. + """ device = None try: q = dpctl.SyclQueue(valid_filter) @@ -331,11 +338,12 @@ def test_valid_filter_selectors(valid_filter, check): def test_invalid_filter_selectors(invalid_filter): - """An invalid filter string should always be caught and a SyclQueueCreationError - raised. + """ + An invalid filter string should always be caught and a + SyclQueueCreationError raised. """ with pytest.raises(dpctl.SyclQueueCreationError): - q = dpctl.SyclQueue(invalid_filter) + dpctl.SyclQueue(invalid_filter) def test_context_not_equals(): diff --git a/dpctl/tests/test_sycl_queue_manager.py b/dpctl/tests/test_sycl_queue_manager.py index 6262605afe..bca73853df 100644 --- a/dpctl/tests/test_sycl_queue_manager.py +++ b/dpctl/tests/test_sycl_queue_manager.py @@ -17,8 +17,10 @@ """Defines unit test cases for the SyclQueueManager class. """ -import dpctl import unittest + +import dpctl + from ._helper import has_cpu, has_gpu, has_sycl_platforms @@ -52,7 +54,9 @@ def test_get_current_device_type_inside_device_ctxt(self): self.assertNotEqual(dpctl.get_current_device_type(), None) with dpctl.device_context("opencl:gpu:0"): - self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.gpu) + self.assertEqual( + dpctl.get_current_device_type(), dpctl.device_type.gpu + ) self.assertNotEqual(dpctl.get_current_device_type(), None) @@ -61,11 +65,17 @@ def test_get_current_device_type_inside_nested_device_ctxt(self): self.assertNotEqual(dpctl.get_current_device_type(), None) with dpctl.device_context("opencl:cpu:0"): - self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.cpu) + self.assertEqual( + dpctl.get_current_device_type(), dpctl.device_type.cpu + ) with dpctl.device_context("opencl:gpu:0"): - self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.gpu) - self.assertEqual(dpctl.get_current_device_type(), dpctl.device_type.cpu) + self.assertEqual( + dpctl.get_current_device_type(), dpctl.device_type.gpu + ) + self.assertEqual( + dpctl.get_current_device_type(), dpctl.device_type.cpu + ) self.assertNotEqual(dpctl.get_current_device_type(), None) diff --git a/dpctl/tests/test_sycl_queue_memcpy.py b/dpctl/tests/test_sycl_queue_memcpy.py index 15a943b665..ce68183156 100644 --- a/dpctl/tests/test_sycl_queue_memcpy.py +++ b/dpctl/tests/test_sycl_queue_memcpy.py @@ -17,9 +17,11 @@ """Defines unit test cases for the SyclQueue.memcpy. """ +import unittest + import dpctl import dpctl.memory -import unittest + from ._helper import has_sycl_platforms @@ -65,7 +67,9 @@ def test_memcpy_type_error(self): q.memcpy(mobj, None, 3) self.assertEqual(type(cm.exception), TypeError) - self.assertEqual(str(cm.exception), "Parameter `src` should have type _Memory.") + self.assertEqual( + str(cm.exception), "Parameter `src` should have type _Memory." + ) if __name__ == "__main__": diff --git a/dpctl/tests/test_sycl_usm.py b/dpctl/tests/test_sycl_usm.py index e8ae3604cd..81de0fc9c7 100644 --- a/dpctl/tests/test_sycl_usm.py +++ b/dpctl/tests/test_sycl_usm.py @@ -18,15 +18,18 @@ """ import unittest -import dpctl -from dpctl.memory import MemoryUSMShared, MemoryUSMHost, MemoryUSMDevice + import numpy as np + +import dpctl +from dpctl.memory import MemoryUSMDevice, MemoryUSMHost, MemoryUSMShared + from ._helper import has_cpu, has_gpu, has_sycl_platforms class Dummy(MemoryUSMShared): """ - Class that exposes `__sycl_usm_array_interface__` with + Class that exposes ``__sycl_usm_array_interface__`` with SYCL context for sycl object, instead of Sycl queue. """ @@ -152,7 +155,9 @@ def test_pickling(self): mobj_reconstructed = pickle.loads(pickle.dumps(mobj)) self.assertEqual( - type(mobj), type(mobj_reconstructed), "Pickling should preserve type" + type(mobj), + type(mobj_reconstructed), + "Pickling should preserve type", ) self.assertEqual( mobj.tobytes(), @@ -261,7 +266,10 @@ def __sycl_usm_array_interface__(self): class TestMemoryWithView(unittest.TestCase): def test_suai_non_contig_1D(self): - """Test of zero-copy using sycl_usm_array_interface with non-contiguous data""" + """ + Test of zero-copy using sycl_usm_array_interface with non-contiguous + data. + """ MemoryUSMClass = MemoryUSMShared try: @@ -276,7 +284,9 @@ def test_suai_non_contig_1D(self): v = View(buf, shape=(n1d,), strides=(step_1d,), offset=offset) buf2 = MemoryUSMClass(v) expected_nbytes = ( - np.flip(host_canary[offset : offset + n1d * step_1d : step_1d]).ctypes.data + np.flip( + host_canary[offset : offset + n1d * step_1d : step_1d] + ).ctypes.data + 1 - host_canary[offset:].ctypes.data ) diff --git a/examples/cython/sycl_buffer/_buffer_example.pyx b/examples/cython/sycl_buffer/_buffer_example.pyx index 5c3e2414e7..2737dbcc72 100644 --- a/examples/cython/sycl_buffer/_buffer_example.pyx +++ b/examples/cython/sycl_buffer/_buffer_example.pyx @@ -15,14 +15,22 @@ # limitations under the License. cimport numpy as cnp + import numpy as np cimport dpctl as c_dpctl + import dpctl + cdef extern from "use_sycl_buffer.h": - int c_columnwise_total(c_dpctl.DPCTLSyclQueueRef q, size_t n, size_t m, double *m, double *ct) nogil - int c_columnwise_total_no_mkl(c_dpctl.DPCTLSyclQueueRef q, size_t n, size_t m, double *m, double *ct) nogil + int c_columnwise_total( + c_dpctl.DPCTLSyclQueueRef q, size_t n, size_t m, double *m, double *ct + ) nogil + int c_columnwise_total_no_mkl( + c_dpctl.DPCTLSyclQueueRef q, size_t n, size_t m, double *m, double *ct + ) nogil + def columnwise_total(double[:, ::1] v, method='mkl', queue=None): cdef cnp.ndarray res_array = np.empty((v.shape[1],), dtype='d') @@ -41,9 +49,13 @@ def columnwise_total(double[:, ::1] v, method='mkl', queue=None): if method == 'mkl': with nogil: - ret_status = c_columnwise_total(q_ref, v.shape[0], v.shape[1], &v[0,0], &res_memslice[0]) + ret_status = c_columnwise_total( + q_ref, v.shape[0], v.shape[1], &v[0, 0], &res_memslice[0] + ) else: with nogil: - ret_status = c_columnwise_total_no_mkl(q_ref, v.shape[0], v.shape[1], &v[0,0], &res_memslice[0]) + ret_status = c_columnwise_total_no_mkl( + q_ref, v.shape[0], v.shape[1], &v[0, 0], &res_memslice[0] + ) return res_array diff --git a/examples/cython/sycl_buffer/bench.py b/examples/cython/sycl_buffer/bench.py index db5bda926e..9fc493fd18 100644 --- a/examples/cython/sycl_buffer/bench.py +++ b/examples/cython/sycl_buffer/bench.py @@ -14,9 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import dpctl -import syclbuffer as sb +import timeit + import numpy as np +import syclbuffer as sb + +import dpctl X = np.full((10 ** 4, 4098), 1e-4, dtype="d") @@ -40,14 +43,13 @@ ) ) -import timeit print("Times for 'opencl:cpu'") print( timeit.repeat( stmt="sb.columnwise_total(X, queue=q)", setup='q = dpctl.SyclQueue("opencl:cpu"); ' - "sb.columnwise_total(X, queue=q)", # ensure JIT compilation is not counted + "sb.columnwise_total(X, queue=q)", # do not count JIT compilation number=100, globals=globals(), ) @@ -57,7 +59,8 @@ print( timeit.repeat( stmt="sb.columnwise_total(X, queue=q)", - setup='q = dpctl.SyclQueue("opencl:gpu"); sb.columnwise_total(X, queue=q)', + setup='q = dpctl.SyclQueue("opencl:gpu"); ' + "sb.columnwise_total(X, queue=q)", number=100, globals=globals(), ) diff --git a/examples/cython/sycl_buffer/run.py b/examples/cython/sycl_buffer/run.py index f67eb15845..a82a6671a0 100644 --- a/examples/cython/sycl_buffer/run.py +++ b/examples/cython/sycl_buffer/run.py @@ -14,8 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import syclbuffer as sb import numpy as np +import syclbuffer as sb + import dpctl X = np.random.randn(100, 4) diff --git a/examples/cython/sycl_buffer/setup.py b/examples/cython/sycl_buffer/setup.py index 9482316e44..188fdda605 100644 --- a/examples/cython/sycl_buffer/setup.py +++ b/examples/cython/sycl_buffer/setup.py @@ -14,24 +14,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -from os.path import join, exists, abspath, dirname -from os import getcwd from os import environ +from os.path import dirname, join + from Cython.Build import cythonize def configuration(parent_package="", top_path=None): - from numpy.distutils.misc_util import Configuration - from numpy.distutils.system_info import get_info import numpy as np + from numpy.distutils.misc_util import Configuration + import dpctl config = Configuration("", parent_package, top_path) oneapi_root = environ.get("ONEAPI_ROOT", None) if not oneapi_root: - raise ValueError("ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi") + raise ValueError( + "ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi" + ) mkl_info = { "include_dirs": [join(oneapi_root, "mkl", "include")], @@ -65,7 +66,8 @@ def configuration(parent_package="", top_path=None): join(wdir, "use_sycl_buffer.cpp"), join(wdir, "use_sycl_buffer.h"), ], - include_dirs=[wdir, np.get_include(), dpctl.get_include()] + mkl_include_dirs, + include_dirs=[wdir, np.get_include(), dpctl.get_include()] + + mkl_include_dirs, libraries=["sycl"] + mkl_libraries, runtime_library_dirs=mkl_library_dirs, extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], @@ -73,7 +75,9 @@ def configuration(parent_package="", top_path=None): language="c++", ) - config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) + config.ext_modules = cythonize( + config.ext_modules, include_path=[pdir, wdir] + ) return config diff --git a/examples/cython/sycl_direct_linkage/_buffer_example.pyx b/examples/cython/sycl_direct_linkage/_buffer_example.pyx index 1d93eea7d6..49d6cbd95e 100644 --- a/examples/cython/sycl_direct_linkage/_buffer_example.pyx +++ b/examples/cython/sycl_direct_linkage/_buffer_example.pyx @@ -15,15 +15,22 @@ # limitations under the License. cimport numpy as cnp + import numpy as np + from cython.operator cimport dereference as deref + cdef extern from "CL/sycl.hpp" namespace "cl::sycl": cdef cppclass queue nogil: - pass + pass + cdef extern from "sycl_function.hpp": - int c_columnwise_total(queue& q, size_t n, size_t m, double *m, double *ct) nogil + int c_columnwise_total( + queue& q, size_t n, size_t m, double *m, double *ct + ) nogil + def columnwise_total(double[:, ::1] v): cdef cnp.ndarray res_array = np.empty((v.shape[1],), dtype='d') @@ -34,7 +41,9 @@ def columnwise_total(double[:, ::1] v): q = new queue() with nogil: - ret_status = c_columnwise_total(deref(q), v.shape[0], v.shape[1], &v[0,0], &res_memslice[0]) + ret_status = c_columnwise_total( + deref(q), v.shape[0], v.shape[1], &v[0, 0], &res_memslice[0] + ) del q diff --git a/examples/cython/sycl_direct_linkage/bench.py b/examples/cython/sycl_direct_linkage/bench.py index fc7d85d1da..fdff5589ce 100644 --- a/examples/cython/sycl_direct_linkage/bench.py +++ b/examples/cython/sycl_direct_linkage/bench.py @@ -14,30 +14,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import dpctl -import syclbuffer_naive as sb +import timeit + import numpy as np +import syclbuffer_naive as sb X = np.full((10 ** 4, 4098), 1e-4, dtype="d") # warm-up print("=" * 10 + " Executing warm-up " + "=" * 10) print("NumPy result: ", X.sum(axis=0)) - print( "SYCL(default_device) result: {}".format( sb.columnwise_total(X), ) ) - -import timeit - print( - "Running time of 100 calls to columnwise_total on matrix with shape {}".format( - X.shape - ) + "Running time of 100 calls to columnwise_total on matrix with " + "shape {}".format(X.shape) ) - print("Times for default_selector, inclusive of queue creation:") print( timeit.repeat( @@ -47,6 +42,5 @@ globals=globals(), ) ) - print("Times for NumPy") print(timeit.repeat(stmt="X.sum(axis=0)", number=100, globals=globals())) diff --git a/examples/cython/sycl_direct_linkage/run.py b/examples/cython/sycl_direct_linkage/run.py index 4ef1b640c5..33c7005186 100644 --- a/examples/cython/sycl_direct_linkage/run.py +++ b/examples/cython/sycl_direct_linkage/run.py @@ -14,8 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -import syclbuffer_naive as sb import numpy as np +import syclbuffer_naive as sb X = np.random.randn(20, 10) diff --git a/examples/cython/sycl_direct_linkage/setup.py b/examples/cython/sycl_direct_linkage/setup.py index 7cb25d1613..fe71d28a0a 100644 --- a/examples/cython/sycl_direct_linkage/setup.py +++ b/examples/cython/sycl_direct_linkage/setup.py @@ -14,24 +14,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -from os.path import join, exists, abspath, dirname -from os import getcwd from os import environ +from os.path import dirname, join + from Cython.Build import cythonize def configuration(parent_package="", top_path=None): - from numpy.distutils.misc_util import Configuration - from numpy.distutils.system_info import get_info import numpy as np + from numpy.distutils.misc_util import Configuration + import dpctl config = Configuration("", parent_package, top_path) oneapi_root = environ.get("ONEAPI_ROOT", None) if not oneapi_root: - raise ValueError("ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi") + raise ValueError( + "ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi" + ) mkl_info = { "include_dirs": [join(oneapi_root, "mkl", "include")], @@ -65,7 +66,8 @@ def configuration(parent_package="", top_path=None): join(pdir, "sycl_function.cpp"), join(pdir, "sycl_function.hpp"), ], - include_dirs=[wdir, np.get_include(), dpctl.get_include()] + mkl_include_dirs, + include_dirs=[wdir, np.get_include(), dpctl.get_include()] + + mkl_include_dirs, libraries=["sycl"] + mkl_libraries, runtime_library_dirs=mkl_library_dirs, extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], @@ -73,7 +75,9 @@ def configuration(parent_package="", top_path=None): language="c++", ) - config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) + config.ext_modules = cythonize( + config.ext_modules, include_path=[pdir, wdir] + ) return config diff --git a/examples/cython/usm_memory/blackscholes.pyx b/examples/cython/usm_memory/blackscholes.pyx index d0aa23599f..594308aaba 100644 --- a/examples/cython/usm_memory/blackscholes.pyx +++ b/examples/cython/usm_memory/blackscholes.pyx @@ -17,17 +17,37 @@ # cython: language_level=3 # distutils: language=c++ -cimport dpctl as c_dpctl -cimport dpctl.memory as c_dpctl_mem cimport numpy as cnp from cython cimport floating -import dpctl +cimport dpctl as c_dpctl +cimport dpctl.memory as c_dpctl_mem + import numpy as np +import dpctl + + cdef extern from "sycl_blackscholes.hpp": - cdef void cpp_blackscholes[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T* callput) except + - cdef void cpp_populate_params[T](c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T pl, T ph, T sl, T sh, T tl, T th, T rl, T rh, T vl, T vh, int seed) except + + cdef void cpp_blackscholes[T]( + c_dpctl.DPCTLSyclQueueRef, size_t n_opts, T* option_params, T* callput + ) except + + cdef void cpp_populate_params[T]( + c_dpctl.DPCTLSyclQueueRef, + size_t n_opts, + T* option_params, + T pl, + T ph, + T sl, + T sh, + T tl, + T th, + T rl, + T rh, + T vl, + T vh, + int seed + ) except + cdef c_dpctl.SyclQueue from_queue_keyword(queue): if (queue is None): @@ -56,9 +76,11 @@ def black_scholes_price(floating[:, ::1] option_params, queue=None): if (n_params != 5): raise ValueError(( - "Array of option parameters has unexpected number of columns {} != 5. " - "Each row must specify (current_price, strike_price, maturity, interest_rate, volatility)." - ).format(n_params)) + "Array of option parameters has unexpected number of " + "columns {} != 5. Each row must specify (current_price, " + "strike_price, maturity, interest_rate, volatility)." + ).format(n_params) + ) q = from_queue_keyword(queue) q_ptr = q.get_queue_ref() @@ -67,21 +89,36 @@ def black_scholes_price(floating[:, ::1] option_params, queue=None): mobj = c_dpctl_mem.MemoryUSMShared(n_bytes, queue=q) callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='d') call_put_prices = callput_arr - dp1 = &option_params[0,0] - dp2 = &call_put_prices[0,0]; + dp1 = &option_params[0, 0] + dp2 = &call_put_prices[0, 0] cpp_blackscholes[double](q_ptr, n_opts, dp1, dp2) elif (floating is float): n_bytes = 2*n_opts * sizeof(float) mobj = c_dpctl_mem.MemoryUSMShared(n_bytes, queue=q) callput_arr = np.ndarray((n_opts, 2), buffer=mobj, dtype='f') call_put_prices = callput_arr - fp1 = &option_params[0,0] - fp2 = &call_put_prices[0,0] + fp1 = &option_params[0, 0] + fp2 = &call_put_prices[0, 0] cpp_blackscholes[float](q_ptr, n_opts, fp1, fp2) return callput_arr -def populate_params(floating[:, ::1] option_params, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, int seed, queue=None): + +def populate_params( + floating[:, ::1] option_params, + pl, + ph, + sl, + sh, + tl, + th, + rl, + rh, + vl, + vh, + int seed, + queue=None +): cdef size_t n_opts = option_params.shape[0] cdef size_t n_params = option_params.shape[1] @@ -91,16 +128,23 @@ def populate_params(floating[:, ::1] option_params, pl, ph, sl, sh, tl, th, rl, cdef float* fp = NULL if (n_params != 5): - raise ValueError(( - "Array of option parameters has unexpected number of columns {} != 5. " - "Each row must specify (current_price, strike_price, maturity, interest_rate, volatility)." - ).format(n_params)) + raise ValueError( + "Array of option parameters has unexpected number of " + "columns {} != 5. Each row must specify (current_price, " + "strike_price, maturity, interest_rate, volatility).".format( + n_params + ) + ) q = from_queue_keyword(queue) q_ptr = q.get_queue_ref() if (floating is double): - dp = &option_params[0,0] - cpp_populate_params[double](q_ptr, n_opts, dp, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed) + dp = &option_params[0, 0] + cpp_populate_params[double]( + q_ptr, n_opts, dp, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed + ) elif (floating is float): - fp = &option_params[0,0] - cpp_populate_params[float](q_ptr, n_opts, fp, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed) + fp = &option_params[0, 0] + cpp_populate_params[float]( + q_ptr, n_opts, fp, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, seed + ) diff --git a/examples/cython/usm_memory/run.py b/examples/cython/usm_memory/run.py index 00b4ce8320..2b676288dd 100644 --- a/examples/cython/usm_memory/run.py +++ b/examples/cython/usm_memory/run.py @@ -15,11 +15,16 @@ # limitations under the License. # coding: utf-8 -import dpctl.memory as dpctl_mem + +import timeit + import blackscholes_usm as bs -import numpy as np, dpctl +import numpy as np from reference_black_scholes import ref_python_black_scholes +import dpctl +import dpctl.memory as dpctl_mem + def gen_option_params( n_opts, pl, ph, sl, sh, tl, th, rl, rh, vl, vh, dtype, queue=None @@ -52,22 +57,45 @@ def gen_option_params( X_ref = np.array([ref_python_black_scholes(*opt) for opt in opts], dtype="d") print( - "Correctness check: allclose(Xgpu, Xref) == ", np.allclose(Xgpu, X_ref, atol=1e-5) + "Correctness check: allclose(Xgpu, Xref) == ", + np.allclose(Xgpu, X_ref, atol=1e-5), ) n_opts = 3 * 10 ** 6 # compute on CPU sycl device -import timeit - cpu_q = dpctl.SyclQueue("opencl:cpu:0") opts1 = gen_option_params( - n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d", queue=cpu_q + n_opts, + 20.0, + 30.0, + 22.0, + 29.0, + 18.0, + 24.0, + 0.01, + 0.05, + 0.01, + 0.05, + "d", + queue=cpu_q, ) gpu_q = dpctl.SyclQueue("level_zero:gpu:0") opts2 = gen_option_params( - n_opts, 20.0, 30.0, 22.0, 29.0, 18.0, 24.0, 0.01, 0.05, 0.01, 0.05, "d", queue=gpu_q + n_opts, + 20.0, + 30.0, + 22.0, + 29.0, + 18.0, + 24.0, + 0.01, + 0.05, + 0.01, + 0.05, + "d", + queue=gpu_q, ) cpu_times = [] diff --git a/examples/cython/usm_memory/setup.py b/examples/cython/usm_memory/setup.py index 9bb1d93327..a2a507f25b 100644 --- a/examples/cython/usm_memory/setup.py +++ b/examples/cython/usm_memory/setup.py @@ -14,24 +14,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -from os.path import join, exists, abspath, dirname -from os import getcwd from os import environ +from os.path import dirname, join + from Cython.Build import cythonize def configuration(parent_package="", top_path=None): - from numpy.distutils.misc_util import Configuration - from numpy.distutils.system_info import get_info import numpy as np + from numpy.distutils.misc_util import Configuration + import dpctl config = Configuration("", parent_package, top_path) oneapi_root = environ.get("ONEAPI_ROOT", None) if not oneapi_root: - raise ValueError("ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi") + raise ValueError( + "ONEAPI_ROOT must be set, typical value is /opt/intel/oneapi" + ) mkl_info = { "include_dirs": [join(oneapi_root, "mkl", "include")], @@ -65,7 +66,8 @@ def configuration(parent_package="", top_path=None): join(wdir, "sycl_blackscholes.cpp"), join(wdir, "sycl_blackscholes.hpp"), ], - include_dirs=[wdir, np.get_include(), dpctl.get_include()] + mkl_include_dirs, + include_dirs=[wdir, np.get_include(), dpctl.get_include()] + + mkl_include_dirs, libraries=["sycl"] + mkl_libraries, runtime_library_dirs=mkl_library_dirs, extra_compile_args=eca, # + ['-O0', '-g', '-ggdb'], @@ -73,7 +75,9 @@ def configuration(parent_package="", top_path=None): language="c++", ) - config.ext_modules = cythonize(config.ext_modules, include_path=[pdir, wdir]) + config.ext_modules = cythonize( + config.ext_modules, include_path=[pdir, wdir] + ) return config diff --git a/examples/python/_runner.py b/examples/python/_runner.py index 4a3377ee7a..b26865ec5e 100644 --- a/examples/python/_runner.py +++ b/examples/python/_runner.py @@ -36,7 +36,10 @@ def run_examples(example_description, glbls_dict): help="Functions to execute. Use --run all to run all of them.", ) parser.add_argument( - "-l", "--list", action="store_true", help="List available function names to run" + "-l", + "--list", + action="store_true", + help="List available function names to run", ) parser.add_argument( "-q", "--quiet", action="store_true", help="Do not echo example name." @@ -71,7 +74,10 @@ def run_examples(example_description, glbls_dict): print("") if has_nondefault_params(sgn): if not args.quiet: - print(f"INFO: Skip exectution of {fn} as it requires arguments") + print( + f"INFO: Skip exectution of {fn} as it " + "requires arguments" + ) else: if not args.quiet: print(f"INFO: Executing example {fn}") diff --git a/examples/python/subdevices.py b/examples/python/subdevices.py index 887522928b..032a7789b4 100644 --- a/examples/python/subdevices.py +++ b/examples/python/subdevices.py @@ -109,4 +109,6 @@ def create_subdevice_queue(): if __name__ == "__main__": import _runner as runner - runner.run_examples("Examples for working with subdevices in dpctl.", globals()) + runner.run_examples( + "Examples for working with subdevices in dpctl.", globals() + ) diff --git a/examples/python/usm_memory_allocation.py b/examples/python/usm_memory_allocation.py index ad1b701890..5a54d117d4 100644 --- a/examples/python/usm_memory_allocation.py +++ b/examples/python/usm_memory_allocation.py @@ -14,10 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Demonstrates SYCL USM memory usage in Python using dpctl.memory. +""" +Demonstrates SYCL USM memory usage in Python using dpctl.memory. """ -import dpctl import dpctl.memory as dpmem # allocate USM-shared byte-buffer diff --git a/examples/python/usm_memory_host_access.py b/examples/python/usm_memory_host_access.py index 482f0eb70c..984b4273b5 100644 --- a/examples/python/usm_memory_host_access.py +++ b/examples/python/usm_memory_host_access.py @@ -14,11 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Demonstrates how USM allocated memory can be accessed from the host in a +""" +Demonstrates how USM allocated memory can be accessed from the host in a Python program. """ -import dpctl import dpctl.memory as dpmem # USM-shared and USM-host pointers are host-accessible, @@ -53,6 +53,7 @@ except Exception as e: print("") print( - "An expected exception was raised during attempted construction of memoryview from USM-device memory object." + "An expected exception was raised during attempted construction of " + "memoryview from USM-device memory object." ) print("\t", e) diff --git a/examples/python/usm_memory_operation.py b/examples/python/usm_memory_operation.py index d65fad608c..0afc1de640 100644 --- a/examples/python/usm_memory_operation.py +++ b/examples/python/usm_memory_operation.py @@ -14,13 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Demonstrates host to device copy functions using dpctl.memory. +""" +Demonstrates host to device copy functions using dpctl.memory. """ -import dpctl -import dpctl.memory as dpmem import numpy as np +import dpctl.memory as dpmem + ms = dpmem.MemoryUSMShared(32) md = dpmem.MemoryUSMDevice(32) @@ -29,7 +30,8 @@ # copy host byte-like object to USM-device buffer md.copy_from_host(host_buf) -# copy USM-device buffer to USM-shared buffer in parallel (using sycl::queue::memcpy) +# copy USM-device buffer to USM-shared buffer in parallel using +# sycl::queue::memcpy. ms.copy_from_device(md) # build numpy array reusing host-accessible USM-shared memory diff --git a/scripts/build_backend.py b/scripts/build_backend.py index 7d1c2483d0..152448fc7b 100644 --- a/scripts/build_backend.py +++ b/scripts/build_backend.py @@ -18,11 +18,11 @@ """ +import glob import os -import sys -import subprocess import shutil -import glob +import subprocess +import sys IS_WIN = False IS_LIN = False @@ -38,9 +38,9 @@ CODE_COVERAGE = os.environ.get("CODE_COVERAGE") if IS_LIN: - DPCPP_ROOT = os.path.join(ONEAPI_ROOT, "compiler/latest/linux") + DPCPP_ROOT = os.path.join(ONEAPI_ROOT, r"compiler/latest/linux") if IS_WIN: - DPCPP_ROOT = os.path.join(ONEAPI_ROOT, "compiler\latest\windows") + DPCPP_ROOT = os.path.join(ONEAPI_ROOT, r"compiler\latest\windows") dpctl_dir = os.getcwd() build_cmake_dir = os.path.join(dpctl_dir, "build_cmake") @@ -63,8 +63,10 @@ "-DCMAKE_INSTALL_PREFIX=" + INSTALL_PREFIX, "-DCMAKE_PREFIX_PATH=" + INSTALL_PREFIX, "-DDPCPP_INSTALL_DIR=" + DPCPP_ROOT, - "-DCMAKE_C_COMPILER:PATH=" + os.path.join(DPCPP_ROOT, "bin", "clang"), - "-DCMAKE_CXX_COMPILER:PATH=" + os.path.join(DPCPP_ROOT, "bin", "dpcpp"), + "-DCMAKE_C_COMPILER:PATH=" + + os.path.join(DPCPP_ROOT, "bin", "clang"), + "-DCMAKE_CXX_COMPILER:PATH=" + + os.path.join(DPCPP_ROOT, "bin", "dpcpp"), "-DDPCTL_ENABLE_LO_PROGRAM_CREATION=ON", "-DDPCTL_BUILD_CAPI_TESTS=ON", "-DDPCTL_GENERATE_COVERAGE=ON", @@ -83,8 +85,10 @@ "-DCMAKE_INSTALL_PREFIX=" + INSTALL_PREFIX, "-DCMAKE_PREFIX_PATH=" + INSTALL_PREFIX, "-DDPCPP_INSTALL_DIR=" + DPCPP_ROOT, - "-DCMAKE_C_COMPILER:PATH=" + os.path.join(DPCPP_ROOT, "bin", "clang"), - "-DCMAKE_CXX_COMPILER:PATH=" + os.path.join(DPCPP_ROOT, "bin", "dpcpp"), + "-DCMAKE_C_COMPILER:PATH=" + + os.path.join(DPCPP_ROOT, "bin", "clang"), + "-DCMAKE_CXX_COMPILER:PATH=" + + os.path.join(DPCPP_ROOT, "bin", "dpcpp"), "-DDPCTL_ENABLE_LO_PROGRAM_CREATION=ON", backends, ] @@ -105,8 +109,10 @@ "-DCMAKE_INSTALL_PREFIX=" + INSTALL_PREFIX, "-DCMAKE_PREFIX_PATH=" + INSTALL_PREFIX, "-DDPCPP_INSTALL_DIR=" + DPCPP_ROOT, - "-DCMAKE_C_COMPILER:PATH=" + os.path.join(DPCPP_ROOT, "bin", "clang-cl.exe"), - "-DCMAKE_CXX_COMPILER:PATH=" + os.path.join(DPCPP_ROOT, "bin", "dpcpp.exe"), + "-DCMAKE_C_COMPILER:PATH=" + + os.path.join(DPCPP_ROOT, "bin", "clang-cl.exe"), + "-DCMAKE_CXX_COMPILER:PATH=" + + os.path.join(DPCPP_ROOT, "bin", "dpcpp.exe"), backends, ] subprocess.check_call(cmake_args, stderr=subprocess.STDOUT, shell=False) diff --git a/setup.py b/setup.py index ff5d3ceeb7..9d9a94fcdc 100644 --- a/setup.py +++ b/setup.py @@ -16,17 +16,16 @@ import os import os.path -import sys -import versioneer import subprocess +import sys -import setuptools.command.install as orig_install +import numpy as np import setuptools.command.develop as orig_develop - -from setuptools import setup, Extension, find_packages +import setuptools.command.install as orig_install from Cython.Build import cythonize +from setuptools import Extension, find_packages, setup -import numpy as np +import versioneer IS_WIN = False IS_MAC = False @@ -42,18 +41,18 @@ assert False, sys.platform + " not supported" if IS_LIN: - DPCPP_ROOT = os.environ["ONEAPI_ROOT"] + "/compiler/latest/linux" + DPCPP_ROOT = os.environ["ONEAPI_ROOT"] + r"/compiler/latest/linux" os.environ["DPCTL_SYCL_INTERFACE_LIBDIR"] = "dpctl" - os.environ["DPCTL_SYCL_INTERFACE_INCLDIR"] = "dpctl/include" + os.environ["DPCTL_SYCL_INTERFACE_INCLDIR"] = r"dpctl/include" os.environ["CFLAGS"] = "-fPIC" elif IS_WIN: os.environ["DPCTL_SYCL_INTERFACE_LIBDIR"] = "dpctl" - os.environ["DPCTL_SYCL_INTERFACE_INCLDIR"] = "dpctl\include" + os.environ["DPCTL_SYCL_INTERFACE_INCLDIR"] = r"dpctl\include" dpctl_sycl_interface_lib = os.environ["DPCTL_SYCL_INTERFACE_LIBDIR"] dpctl_sycl_interface_include = os.environ["DPCTL_SYCL_INTERFACE_INCLDIR"] -sycl_lib = os.environ["ONEAPI_ROOT"] + "\compiler\latest\windows\lib" +sycl_lib = os.environ["ONEAPI_ROOT"] + r"\compiler\latest\windows\lib" # Get long description with open("README.md", "r", encoding="utf-8") as file: @@ -85,7 +84,7 @@ def get_sdl_ldflags(): if IS_LIN: ldflags = ["-Wl,-z,noexecstack,-z,relro,-z,now"] elif IS_WIN: - ldflags = ["/NXCompat", "/DynamicBase"] + ldflags = [r"/NXCompat", r"/DynamicBase"] # Add ldflags from environment ldflags += remove_empty(os.getenv("LDFLAGS", "").split(" ")) @@ -100,7 +99,7 @@ def get_other_cxxflags(): elif IS_WIN: # FIXME: These are specific to MSVC and we should first make sure # what compiler we are using. - return ["/Ox", "/std:c++17"] + return [r"/Ox", r"/std:c++17"] def get_suppressed_warning_flags():