diff --git a/.gitignore b/.gitignore index 451461d..94ff03d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ imgui.ini *.cpp *.c .* +include/ diff --git a/README.md b/README.md index a86c101..b5ea39f 100644 --- a/README.md +++ b/README.md @@ -2,39 +2,58 @@ Python wrapper for the OptiX 7 raytracing engine. -Python-OptiX wraps the OptiX C++ API using Cython and provides a simplified -interface to the original C-like API using mainly the -[CuPy](https://cupy.dev) package. +Python-OptiX wraps the original OptiX C-like API using Cython while aiming to provide a more +pythonic, object-oriented interface using the [CuPy](https://cupy.dev) package. ### Supported Platforms -Only Linux is supported at the moment. +Only Linux is officially supported at the moment. Experimental windows support is available. ### OptiX Versions -Python-OptiX currently supports the OptiX releases 7.3.0 and 7.4.0 +Python-OptiX always supports the most recent version of the OptiX SDK. +The current version therefore supports OptiX 7.6.0 ## Installation ### Dependencies Install a recent version of the [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads) -and the [OptiX 7.4.0 SDK](https://developer.nvidia.com/optix/downloads/7.4.0/linux64-x86_64) +and the [OptiX 7.6.0 SDK](https://developer.nvidia.com/optix/downloads/7.6.0/linux64-x86_64) -Note: The older [OptiX 7.3.0 SDK](https://developer.nvidia.com/optix/downloads/7.4.0/linux64-x86_64) version is supported as well. +Make sure the CUDA header files are installed as well. -Make sure the CUDA header files are installed as well. +Note, that for some variants of the CUDA Toolkit, +like the one installed by the `conda` package manager, these are not installed by default. +`conda`-environments require the additional `cudatoolkit-dev` package. -Add the locations of CUDA and OptiX to the system `PATH` variable if necessary. +### Environment + +`python-optix` requires both the OptiX as well as the CUDA include path during setup as well as runtime +to compile the CUDA kernels. Therefore, it is necessary to either add both locations to the system `PATH` +or set the `CUDA_PATH` and `OPTIX_PATH` variables to the respective locations. + +The setup additionally has the option to embed the OptiX header files into the `python-optix` installation. +If the variable `OPTIX_EMBED_HEADERS` is set to `1`, the setup will copy the headers from the +OptiX SDK directory into the generated wheel. + +If this option was chosen during setup, setting the `OPTIX_PATH` is no longer required as the +embedded headers will be utilized then. ### Using pip ``` -pip install python-optix +export OPTIX_PATH=/path/to/optix +export CUDA_PATH=/path/to/cuda_toolkit +export OPTIX_EMBED_HEADERS=1 # embed the optix headers into the package +python -m pip install python-optix ``` ### From source ``` git clone https://github.com/mortacious/python-optix.git cd python-optix -python setup.py install +export OPTIX_PATH=/path/to/optix +export CUDA_PATH=/path/to/cuda_toolkit +export OPTIX_EMBED_HEADERS=1 # embed the optix headers into the package +python -m pip install [-e] . ``` diff --git a/examples/compile_with_tasks.py b/examples/compile_with_tasks.py index 9e967ff..f3b2a35 100644 --- a/examples/compile_with_tasks.py +++ b/examples/compile_with_tasks.py @@ -72,4 +72,4 @@ tic = time.time() for i in range(args.num_iters): module = ox.Module(ctx, ptx, module_compile_options=compile_opts, pipeline_compile_options=pipeline_options) - print("Overall run time without tasks", time.time()-tic) \ No newline at end of file + print("Overall run time without tasks", time.time()-tic) diff --git a/examples/cuda/helpers.h b/examples/cuda/helpers.h index 046cb47..af0995c 100644 --- a/examples/cuda/helpers.h +++ b/examples/cuda/helpers.h @@ -42,11 +42,6 @@ __forceinline__ __device__ float3 toSRGB( const float3& c ) c.z < 0.0031308f ? 12.92f * c.z : 1.055f * powed.z - 0.055f ); } -//__forceinline__ __device__ float dequantizeUnsigned8Bits( const unsigned char i ) -//{ -// enum { N = (1 << 8) - 1 }; -// return min((float)i / (float)N), 1.f) -//} __forceinline__ __device__ unsigned char quantizeUnsigned8Bits( float x ) { x = clamp( x, 0.0f, 1.0f ); @@ -60,6 +55,7 @@ __forceinline__ __device__ uchar4 make_color( const float3& c ) float3 srgb = toSRGB( clamp( c, 0.0f, 1.0f ) ); return make_uchar4( quantizeUnsigned8Bits( srgb.x ), quantizeUnsigned8Bits( srgb.y ), quantizeUnsigned8Bits( srgb.z ), 255u ); } + __forceinline__ __device__ uchar4 make_color( const float4& c ) { return make_color( make_float3( c.x, c.y, c.z ) ); diff --git a/examples/cuda/opacity_micromap.cu b/examples/cuda/opacity_micromap.cu new file mode 100644 index 0000000..d523bfe --- /dev/null +++ b/examples/cuda/opacity_micromap.cu @@ -0,0 +1,143 @@ +// +// Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include + +#include "opacity_micromap.h" +#include "helpers.h" + +#include "vec_math.h" + +extern "C" { +__constant__ Params params; +} + + +static __forceinline__ __device__ void setPayloadColor( float3 p ) +{ + optixSetPayload_0( __float_as_uint( p.x ) ); + optixSetPayload_1( __float_as_uint( p.y ) ); + optixSetPayload_2( __float_as_uint( p.z ) ); +} + +static __forceinline__ __device__ void setPayloadAnyhit( unsigned int a ) +{ + optixSetPayload_3( a ); +} + + +static __forceinline__ __device__ void computeRay( uint3 idx, uint3 dim, float3& origin, float3& direction ) +{ + const float3 U = params.cam_u; + const float3 V = params.cam_v; + const float3 W = params.cam_w; + const float2 d = 2.0f * make_float2( + static_cast( idx.x ) / static_cast( dim.x ), + static_cast( idx.y ) / static_cast( dim.y ) + ) - 1.0f; + + origin = params.cam_eye; + direction = normalize( d.x * U + d.y * V + W ); +} + + +extern "C" __global__ void __raygen__rg() +{ + // Lookup our location within the launch grid + const uint3 idx = optixGetLaunchIndex(); + const uint3 dim = optixGetLaunchDimensions(); + + // Map our launch idx to a screen location and create a ray from the camera + // location through the screen + float3 ray_origin, ray_direction; + computeRay( idx, dim, ray_origin, ray_direction ); + + // Trace the ray against our scene hierarchy + unsigned int p0, p1, p2, p3=0; + optixTrace( + params.handle, + ray_origin, + ray_direction, + 0.0f, // Min intersection distance + 1e16f, // Max intersection distance + 0.0f, // rayTime -- used for motion blur + OptixVisibilityMask( 255 ), // Specify always visible + OPTIX_RAY_FLAG_NONE, + 0, // SBT offset -- See SBT discussion + 1, // SBT stride -- See SBT discussion + 0, // missSBTIndex -- See SBT discussion + p0, p1, p2, p3 ); + float3 result; + result.x = __uint_as_float( p0 ); + result.y = __uint_as_float( p1 ); + result.z = __uint_as_float( p2 ); + unsigned int anyhit_executed = p3; + + // If anyhit was executed, tint the pixel towards white + if( anyhit_executed ) + result = lerp( result, make_float3( 1.0f), 0.075f ); + + // Record results in our output raster + params.image[idx.y * params.image_width + idx.x] = make_color( result ); +} + + +extern "C" __global__ void __miss__ms() +{ + MissData* miss_data = reinterpret_cast( optixGetSbtDataPointer() ); + setPayloadColor( miss_data->bg_color ); +} + + +extern "C" __global__ void __closesthit__ch() +{ + // When built-in triangle intersection is used, a number of fundamental + // attributes are provided by the OptiX API, including barycentric coordinates. + const float2 barycentrics = optixGetTriangleBarycentrics(); + + setPayloadColor( make_float3( barycentrics*0.5f, 0.5f ) ); +} + + +extern "C" __global__ void __anyhit__opacity() +{ + setPayloadAnyhit( 1u ); // Register that anyhit was invoked + + const HitGroupData* rt_data = reinterpret_cast( optixGetSbtDataPointer() ); + const float2 barycentrics = optixGetTriangleBarycentrics(); + const int prim_idx = optixGetPrimitiveIndex(); + + const float2 uv0 = rt_data->uvs[ prim_idx*3 + 0 ]; + const float2 uv1 = rt_data->uvs[ prim_idx*3 + 1 ]; + const float2 uv2 = rt_data->uvs[ prim_idx*3 + 2 ]; + //printf("AH: uv0: (%f, %f) uv1: (%f, %f) uv2: (%f, %f)\n", uv0.x, uv0.y, uv1.x, uv1.y, uv2.x, uv2.y); + const float2 uv = computeUV( barycentrics, uv0, uv1, uv2 ); + + if( inCircle( uv ) ) + optixIgnoreIntersection(); +} diff --git a/examples/cuda/opacity_micromap.h b/examples/cuda/opacity_micromap.h new file mode 100644 index 0000000..290048e --- /dev/null +++ b/examples/cuda/opacity_micromap.h @@ -0,0 +1,82 @@ +// +// Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// + +#include "vec_math.h" + +constexpr float CIRCLE_RADIUS = 0.75f; + +//----------------------------------------------------------------------------- +// +// Helper functions to be used when pre-baking opacity into OMM and when +// evaluating opacity within anyhit function +// +//----------------------------------------------------------------------------- + +static __host__ __device__ __inline__ float2 computeUV( float2 bary, float2 uv0, float2 uv1, float2 uv2 ) +{ + return ( 1.0f - bary.x - bary.y )*uv0 + bary.x*uv1 + bary.y*uv2; +} + +static __host__ __device__ __inline__ bool inCircle( const float2 uv ) +{ + return ( uv.x * uv.x + uv.y * uv.y ) < ( CIRCLE_RADIUS * CIRCLE_RADIUS ); +}; + + +//----------------------------------------------------------------------------- +// +// Types +// +//----------------------------------------------------------------------------- +struct Params +{ + uchar4* image; + unsigned int image_width; + unsigned int image_height; + float3 cam_eye; + float3 cam_u, cam_v, cam_w; + OptixTraversableHandle handle; +}; + + +struct RayGenData +{ + // No data needed +}; + + +struct MissData +{ + float3 bg_color; +}; + + +struct HitGroupData +{ + float2* uvs; +}; diff --git a/examples/denolser.py b/examples/denoiser.py similarity index 96% rename from examples/denolser.py rename to examples/denoiser.py index 6e3d30f..2fcf1f0 100644 --- a/examples/denolser.py +++ b/examples/denoiser.py @@ -59,10 +59,10 @@ ret = cp.asnumpy(ret) fig, axs = plt.subplots(nrows=2, sharex=True, sharey=True) - axs[0].imshow(np.clip(color_image, 0, 255).astype(np.uint8)) + axs[0].imshow(np.clip(color_image, 0, 1).astype(np.float32)) axs[0].set_title("original") - ret = np.clip(ret, 0, 255).astype(np.uint8) + ret = np.clip(ret, 0, 1).astype(np.float32) axs[1].imshow(ret) axs[1].set_title("denoised") plt.show() \ No newline at end of file diff --git a/examples/dynamic_geometry.py b/examples/dynamic_geometry.py index 49f94df..f9344ae 100644 --- a/examples/dynamic_geometry.py +++ b/examples/dynamic_geometry.py @@ -18,6 +18,16 @@ DEBUG=False +if DEBUG: + exception_flags=ox.ExceptionFlags.DEBUG | ox.ExceptionFlags.TRACE_DEPTH | ox.ExceptionFlags.STACK_OVERFLOW, + debug_level = ox.CompileDebugLevel.FULL + opt_level = ox.CompileOptimizationLevel.LEVEL_0 +else: + exception_flags=ox.ExceptionFlags.NONE + debug_level = ox.CompileDebugLevel.MINIMAL + opt_level = ox.CompileOptimizationLevel.LEVEL_3 + + #------------------------------------------------------------------------------ # Local types #------------------------------------------------------------------------------ @@ -41,7 +51,9 @@ def __init__(self): def __getattribute__(self, name): if name in Params._params.keys(): - return self.__dict__['handle'][name] + item = self.__dict__['handle'][name] + if isinstance(item, np.ndarray) and item.shape in ((0,), (1,)): + return item.item() else: return super().__getattribute__(name) @@ -336,15 +348,9 @@ def build_mesh_accel(state): def create_module(state): - if DEBUG: - exception_flags=ox.ExceptionFlags.DEBUG | ox.ExceptionFlags.TRACE_DEPTH | ox.ExceptionFlags.STACK_OVERFLOW, - else: - exception_flags=ox.ExceptionFlags.NONE - - print("Triangle value", ox.PrimitiveTypeFlags.TRIANGLE.value) pipeline_opts = ox.PipelineCompileOptions( uses_motion_blur=False, - uses_primitive_type_flags =ox.PrimitiveTypeFlags.TRIANGLE, + uses_primitive_type_flags=ox.PrimitiveTypeFlags.TRIANGLE, traversable_graph_flags=ox.TraversableGraphFlags.ALLOW_SINGLE_LEVEL_INSTANCING, exception_flags=exception_flags, num_payload_values=3, @@ -353,8 +359,7 @@ def create_module(state): compile_opts = ox.ModuleCompileOptions( max_register_count=ox.ModuleCompileOptions.DEFAULT_MAX_REGISTER_COUNT, - opt_level=ox.CompileOptimizationLevel.DEFAULT, - debug_level=ox.CompileDebugLevel.MODERATE) + opt_level=opt_level, debug_level=debug_level) cuda_source = os.path.join(script_dir, 'cuda', 'dynamic_geometry.cu') state.module = ox.Module(state.ctx, cuda_source, compile_opts, pipeline_opts) @@ -371,7 +376,7 @@ def create_pipeline(state): program_grps = [state.raygen_grp, state.miss_grp, state.hit_grp] link_opts = ox.PipelineLinkOptions(max_trace_depth=1, - debug_level=ox.CompileDebugLevel.MODERATE) + debug_level=debug_level) pipeline = ox.Pipeline(state.ctx, compile_options=state.pipeline_opts, @@ -414,7 +419,7 @@ def create_sbt(state): animation_time = 1.0 buffer_format = BufferImageFormat.UCHAR4 - output_buffer_type = CudaOutputBufferType.CUDA_DEVICE + output_buffer_type = CudaOutputBufferType.enable_gl_interop() init_camera_state(state) create_context(state) @@ -479,7 +484,7 @@ def create_sbt(state): glfw.swap_buffers(window) - state.params.subframe_index = state.params.subframe_index.item() + 1 + state.params.subframe_index = state.params.subframe_index+ 1 impl.shutdown() glfw.terminate() diff --git a/examples/dynamic_materials.py b/examples/dynamic_materials.py index 5619419..9303665 100644 --- a/examples/dynamic_materials.py +++ b/examples/dynamic_materials.py @@ -38,7 +38,9 @@ def __init__(self): def __getattribute__(self, name): if name in Params._params.keys(): - return self.__dict__['handle'][name] + item = self.__dict__['handle'][name] + if isinstance(item, np.ndarray) and item.shape in ((0,), (1,)): + return item.item() else: return super().__getattribute__(name) diff --git a/examples/hello.py b/examples/hello.py index e79fdaa..a7f5422 100644 --- a/examples/hello.py +++ b/examples/hello.py @@ -1,15 +1,19 @@ +import os, sys, logging import optix as ox import cupy as cp import numpy as np from PIL import Image, ImageOps -import logging -import sys + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) log = logging.getLogger() +script_dir = os.path.dirname(__file__) +cuda_src = os.path.join(script_dir, "cuda", "hello.cu") + + def create_module(ctx, pipeline_opts): compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.FULL, opt_level=ox.CompileOptimizationLevel.LEVEL_0) - module = ox.Module(ctx, 'cuda/hello.cu', compile_opts, pipeline_opts) + module = ox.Module(ctx, cuda_src, compile_opts, pipeline_opts) return module diff --git a/examples/opacity_micromap.py b/examples/opacity_micromap.py new file mode 100644 index 0000000..8478e93 --- /dev/null +++ b/examples/opacity_micromap.py @@ -0,0 +1,308 @@ +# +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# +# ----------------------------------------------------------------------------- +# +# A simple demonstration of opacity micromaps. +# +# * A single quad, made of two triangles ABC and ACD is rendered with a +# transparent circular cutout at its center. +# * OMMs are applied to the two triangles to accelerate the evaluation of the +# opacity function during traversal. +# * As a preproces, OMM microtriangles are marked as either completely +# transparent, completely opaque, or unknown. +# * During traversal, rays that hit opaque or transparent regions of the OMM +# can skip the anyhit function. +# * Rays that hit 'unknown' regions of the OMM evaluate the anyhit to get +# accurate evaluation of the opacity function. +# * Regions of the micromap which are unknown are tinted a lighter color to +# visualize the regions which required anyhit evaluation. +# +# ----------------------------------------------------------------------------- + + +import os, sys, logging, collections + +import cupy as cp +import numpy as np +import optix as ox +from sutil.camera import Camera +from PIL import Image, ImageOps + +script_dir = os.path.dirname(os.path.abspath(__file__)) +cuda_src = os.path.join(script_dir, "cuda", "opacity_micromap.cu") + + +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) +log = logging.getLogger() + +DEBUG = False + +OMM_SUBDIV_LEVEL = 4 +NUM_TRIS = 2 +DEFAULT_WIDTH = 1024 +DEFAULT_HEIGHT = 768 +CIRCLE_RADIUS = 0.75 + +g_uvs = np.array([[[1.0, -1.0], [-1.0, -1.0], [-1.0, 1.0]], + [[1.0, -1.0], [-1.0, 1.0], [1.0, 1.0]]], dtype=np.float32) + +d_uvs = cp.asarray(g_uvs) + +vertices = np.array([[-0.5, -0.5, 0.0], + [ 0.5, -0.5, 0.0], + [ 0.5, 0.5, 0.0], + + [-0.5, -0.5, 0.0], + [ 0.5, 0.5, 0.0], + [-0.5, 0.5, 0.0]], dtype=np.float32) + + +class Params: + _params = collections.OrderedDict([ + ('image', 'u8'), + ('image_width', 'u4'), + ('image_height', 'u4'), + ('cam_eye', '3f4'), + ('camera_u', '3f4'), + ('camera_v', '3f4'), + ('camera_w', '3f4'), + ('trav_handle', 'u8'), + ]) + + def __init__(self): + self.handle = ox.LaunchParamsRecord(names=tuple(self._params.keys()), + formats=tuple(self._params.values())) + + def __getattribute__(self, name): + if name in Params._params.keys(): + return self.__dict__['handle'][name] + else: + return super().__getattribute__(name) + + def __setattr__(self, name, value): + if name in Params._params.keys(): + self.handle[name] = value + elif name in {'handle'}: + super().__setattr__(name, value) + else: + raise AttributeError(name) + + def __str__(self): + return '\n'.join(f'{k}: {self.handle[k]}' for k in self._params) + + +##------------------------------------------------------------------------------ +## +## Helper Functions +## +##------------------------------------------------------------------------------ + + +def init_camera(params): + camera = Camera() + camera.eye = (0, 0, 1.5) + camera.look_at = (0, 0, 0) + camera.up = (0, 1, 3) + camera.fov_y = 45 + camera.aspect_ratio = params.image_width / params.image_height + + u, v, w = camera.uvw_frame() + params.camera_u = u + params.camera_v = v + params.camera_w = w + params.cam_eye = camera.eye + + +def compute_uv(bary, uv0, uv1, uv2): + bary = bary[np.newaxis, :, np.newaxis] + return (1.0 - bary[..., 0] - bary[..., 1]) * uv0[:, np.newaxis] + bary[..., 0] * uv1[:, np.newaxis] + bary[..., 1] * uv2[:, np.newaxis] + + +def in_circle(uv, radius): + return (uv[..., 0] * uv[..., 0] + uv[..., 1] * uv[..., 1]) < (radius * radius) + + +def evaluate_opacity(bary0, bary1, bary2, uvs, radius): + """ + Calculate the texture coordinate at the micromesh vertices of the triangle and + determine if the triangle is inside, outside, or spanning the boundary of the circle. + Note that the tex coords are in [-1, 1] and the circle is centered at uv=(0,0). + """ + + uv0 = compute_uv(bary0, uvs[:, 0, :], uvs[:, 1, :], uvs[:, 2, :]) + uv1 = compute_uv(bary1, uvs[:, 0, :], uvs[:, 1, :], uvs[:, 2, :]) + uv2 = compute_uv(bary2, uvs[:, 0, :], uvs[:, 1, :], uvs[:, 2, :]) + + in_circle0 = in_circle(uv0, radius) + in_circle1 = in_circle(uv1, radius) + in_circle2 = in_circle(uv2, radius) + + opacity = np.full_like(in_circle0, dtype=np.uint8, fill_value=ox.OpacityMicromapState.UNKNOWN_OPAQUE) + + transparent = np.logical_and.reduce((in_circle0, in_circle1, in_circle2)) + opaque = np.logical_and.reduce((~in_circle0, ~in_circle1, ~in_circle2)) + opacity[transparent] = ox.OpacityMicromapState.TRANSPARENT + opacity[opaque] = ox.OpacityMicromapState.OPAQUE + + return opacity + + +def create_opacity_micromap(ctx): + NUM_MICRO_TRIS = 1 << (OMM_SUBDIV_LEVEL * 2) + + # this has to be compressed later + uTriIs = np.arange(0, NUM_MICRO_TRIS, dtype=np.uint32) + + bary0, bary1, bary2 = ox.micromap_indices_to_base_barycentrics(uTriIs, OMM_SUBDIV_LEVEL) + omm_input_data = evaluate_opacity(bary0, bary1, bary2, g_uvs, CIRCLE_RADIUS) + + # construct the omm input from the data array (this will also bake the data into the format required by optix) + omm_input = ox.OpacityMicromapInput(omm_input_data, format=ox.OpacityMicromapFormat.FOUR_STATE) + omm = ox.OpacityMicromapArray(ctx, omm_input) + return omm + + +def create_acceleration_structure(ctx, vertices, omm): + usage_counts = [2] + index_buffer = np.array([0, 1], dtype=np.uint16) + omm_build_input = ox.BuildInputOpacityMicromap(omm, usage_counts, + ox.OpacityMicromapArrayIndexingMode.INDEXED, + index_buffer=index_buffer) + triangle_input = ox.BuildInputTriangleArray(vertices, + flags=[ox.GeometryFlags.NONE], + opacity_micromap=omm_build_input) + gas = ox.AccelerationStructure(ctx, triangle_input, compact=True) + return gas + + +def create_module(ctx, pipeline_opts): + compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.NONE, opt_level=ox.CompileOptimizationLevel.LEVEL_3) + module = ox.Module(ctx, cuda_src, compile_opts, pipeline_opts) + return module + + +def create_program_groups(ctx, module): + raygen_grp = ox.ProgramGroup.create_raygen(ctx, module, "__raygen__rg") + miss_grp = ox.ProgramGroup.create_miss(ctx, module, "__miss__ms") + hit_grp = ox.ProgramGroup.create_hitgroup(ctx, module, + entry_function_CH="__closesthit__ch", + entry_function_AH="__anyhit__opacity") + return raygen_grp, miss_grp, hit_grp + + +def create_pipeline(ctx, program_grps, pipeline_options): + link_opts = ox.PipelineLinkOptions(max_trace_depth=1, + debug_level=ox.CompileDebugLevel.NONE) + + pipeline = ox.Pipeline(ctx, + compile_options=pipeline_options, + link_options=link_opts, + program_groups=program_grps) + pipeline.compute_stack_sizes(1, # max_trace_depth + 0, # max_cc_depth + 0) # max_dc_depth + + return pipeline + + +def create_sbt(program_grps): + raygen_grp, miss_grp, hit_grp = program_grps + + raygen_sbt = ox.SbtRecord(raygen_grp) + miss_sbt = ox.SbtRecord(miss_grp, names=('bg_color',), formats=('3f4',)) + miss_sbt['bg_color'] = [0.01, 0.01, 0.01] + + hit_sbt = ox.SbtRecord(hit_grp, names=('uvs',), formats=('u8',)) + hit_sbt["uvs"] = d_uvs.data.ptr + + sbt = ox.ShaderBindingTable(raygen_record=raygen_sbt, miss_records=miss_sbt, hitgroup_records=hit_sbt) + + return sbt + + +def launch_pipeline(pipeline: ox.Pipeline, sbt, params): + img_size = (params.image_width[0], params.image_height[0]) + output_image = np.zeros(img_size + (4, ), 'B') + output_image = cp.asarray(output_image) + + params.image = output_image.data.ptr + + stream = cp.cuda.Stream() + pipeline.launch(sbt, dimensions=img_size, params=params.handle, stream=stream) + + stream.synchronize() + return cp.asnumpy(output_image) + + +if __name__ == "__main__": + logger = ox.Logger(log) + ctx = ox.DeviceContext(validation_mode=True, + log_callback_function=logger, + log_callback_level=4) + ctx.cache_enabled = False + + params = Params() + params.image_width = DEFAULT_WIDTH + params.image_height = DEFAULT_HEIGHT + + init_camera(params) + + opacity_micromap = create_opacity_micromap(ctx) + gas = create_acceleration_structure(ctx, vertices, opacity_micromap) + params.trav_handle = gas.handle + + pipeline_options = ox.PipelineCompileOptions(traversable_graph_flags=ox.TraversableGraphFlags.ALLOW_SINGLE_GAS, + num_payload_values=4, + num_attribute_values=2, + uses_motion_blur=False, + exception_flags=ox.ExceptionFlags.NONE, + uses_primitive_type_flags=ox.PrimitiveTypeFlags.TRIANGLE, + pipeline_launch_params_variable_name="params", + allow_opacity_micromaps=True) + + module = create_module(ctx, pipeline_options) + program_grps = create_program_groups(ctx, module) + pipeline = create_pipeline(ctx, program_grps, pipeline_options) + + sbt = create_sbt(program_grps) + + img = launch_pipeline(pipeline, sbt, params) + + img = img.reshape(params.image_height[0], params.image_width[0], 4) + img = ImageOps.flip(Image.fromarray(img, 'RGBA')) + img.show() + + + + + + + + + diff --git a/examples/spheres.py b/examples/spheres.py index a7776e9..c2b55d3 100644 --- a/examples/spheres.py +++ b/examples/spheres.py @@ -1,11 +1,15 @@ +import os, sys, logging import optix as ox import cupy as cp import numpy as np from PIL import Image, ImageOps -import logging -import sys + logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) log = logging.getLogger() + +script_dir = os.path.dirname(__file__) +cuda_src = os.path.join(script_dir, "cuda", "spheres.cu") + img_size = (1024, 768) def compute_spheres_bbox(centers, radii): @@ -23,7 +27,7 @@ def create_acceleration_structure(ctx, bboxes): def create_module(ctx, pipeline_opts): compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.FULL, opt_level=ox.CompileOptimizationLevel.LEVEL_0) - module = ox.Module(ctx, 'cuda/spheres.cu', compile_opts, pipeline_opts) + module = ox.Module(ctx, cuda_src, compile_opts, pipeline_opts) return module diff --git a/examples/sutil/cuda_output_buffer.py b/examples/sutil/cuda_output_buffer.py index 059fa6d..d09353f 100644 --- a/examples/sutil/cuda_output_buffer.py +++ b/examples/sutil/cuda_output_buffer.py @@ -1,4 +1,5 @@ -import enum +import sys, os, enum +from packaging import version import numpy as np import cupy as cp @@ -7,6 +8,49 @@ from .vecmath import vtype_to_dtype +try: + import cuda as _cuda + from cuda import cudart + has_cudart = True + has_gl_interop = version.parse(_cuda.__version__) >= version.parse("11.6.0") +except ImportError: + cudart = None + has_cudart = False + has_gl_interop = False + +_cuda_opengl_interop_msg = ( + "Cuda Python low level bindings v11.6.0 or later are required to enable " + f"Cuda/OpenGL interoperability.{os.linesep}You can install the missing package with:" + f"{os.linesep} {sys.executable} -m pip install --upgrade --user cuda-python" +) + +if has_cudart: + def format_cudart_err(err): + return ( + f"{cudart.cudaGetErrorName(err)[1].decode('utf-8')}({int(err)}): " + f"{cudart.cudaGetErrorString(err)[1].decode('utf-8')}" + ) + + + def check_cudart_err(args): + if isinstance(args, tuple): + assert len(args) >= 1 + err = args[0] + if len(args) == 1: + ret = None + elif len(args) == 2: + ret = args[1] + else: + ret = args[1:] + else: + ret = None + + assert isinstance(err, cudart.cudaError_t), type(err) + if err != cudart.cudaError_t.cudaSuccess: + raise RuntimeError(format_cudart_err(err)) + + return ret + class BufferImageFormat(enum.Enum): UCHAR4=0 @@ -35,11 +79,22 @@ class CudaOutputBufferType(enum.Enum): ZERO_COPY = 2, # general case, preferred for multi-gpu if not fully nvlink connected CUDA_P2P = 3, # fully connected only, preferred for fully nvlink connected + @classmethod + def enable_gl_interop(cls, fallback=True): + if has_gl_interop: + return cls.GL_INTEROP + elif fallback: + msg = _cuda_opengl_interop_msg + f"{os.linesep}Falling back to slower CUDA_DEVICE output buffer." + print(msg) + return cls.CUDA_DEVICE + else: + raise RuntimeError(_cuda_opengl_interop_msg) + class CudaOutputBuffer: __slots__ = ['_pixel_format', '_buffer_type', '_width', '_height', '_device', '_device_idx', '_device', '_stream', - '_host_buffer', '_device_buffer', '_pbo'] + '_host_buffer', '_device_buffer', '_cuda_gfx_ressource', '_pbo'] def __init__(self, buffer_type, pixel_format, width, height, device_idx=0): for attr in self.__slots__: @@ -50,6 +105,16 @@ def __init__(self, buffer_type, pixel_format, width, height, device_idx=0): self.buffer_type = buffer_type self.resize(width, height) self.stream = None + + if buffer_type is CudaOutputBufferType.GL_INTEROP: + if not has_gl_interop: + raise RuntimeError(_cuda_opengl_interop_msg) + device_count, device_ids = check_cudart_err( cudart.cudaGLGetDevices(1, cudart.cudaGLDeviceList.cudaGLDeviceListAll) ) + if device_count <= 0: + raise RuntimeError("No OpenGL device found, cannot enable GL_INTEROP.") + elif device_ids[0] != device_idx: + raise RuntimeError(f"OpenGL device id {device_ids[0]} does not match requested " + f"device index {device_idx} for Cuda/OpenGL interop.") self._reallocate_buffers() @@ -69,13 +134,29 @@ def map(self): self._make_current() if (self._host_buffer is None) or (self._device_buffer is None): self._reallocate_buffers() - return self._device_buffer.data.ptr + if self.buffer_type is CudaOutputBufferType.CUDA_DEVICE: + return self._device_buffer.data.ptr + elif self.buffer_type is CudaOutputBufferType.GL_INTEROP: + check_cudart_err( + cudart.cudaGraphicsMapResources(1, self._cuda_gfx_ressource, self._stream.ptr) + ) + ptr, size = check_cudart_err( + cudart.cudaGraphicsResourceGetMappedPointer(self._cuda_gfx_ressource) + ) + return ptr + else: + msg = f'Buffer type {self.buffer_type} has not been implemented yet.' + raise NotImplementedError(msg) def unmap(self): self._make_current() buffer_type = self.buffer_type if buffer_type is CudaOutputBufferType.CUDA_DEVICE: self._stream.synchronize() + elif buffer_type is CudaOutputBufferType.GL_INTEROP: + check_cudart_err( + cudart.cudaGraphicsUnmapResources(1, self._cuda_gfx_ressource, self._stream.ptr) + ) else: msg = f'Buffer type {buffer_type} has not been implemented yet.' raise NotImplementedError(msg) @@ -85,12 +166,13 @@ def get_pbo(self): self._make_current() - if self._pbo is None: - self._pbo = gl.glGenBuffers(1) - if buffer_type is CudaOutputBufferType.CUDA_DEVICE: + if self._pbo is None: + self._pbo = gl.glGenBuffers(1) self.copy_device_to_host() self.copy_host_to_pbo() + elif buffer_type is CudaOutputBufferType.GL_INTEROP: + assert self._pbo is not None else: msg = f'Buffer type {buffer_type} has not been implemented yet.' raise NotImplementedError(msg) @@ -121,14 +203,26 @@ def _reallocate_buffers(self): dtype = self.pixel_format shape = (self.height, self.width) + + self._host_buffer = np.empty(shape=shape, dtype=dtype) if buffer_type is CudaOutputBufferType.CUDA_DEVICE: - self._host_buffer = np.empty(shape=shape, dtype=dtype) self._device_buffer = cp.empty(shape=shape, dtype=dtype) if self._pbo is not None: gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._pbo) gl.glBufferData(gl.GL_ARRAY_BUFFER, self._host_buffer, gl.GL_STREAM_DRAW) gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0) + elif buffer_type is CudaOutputBufferType.GL_INTEROP: + self._pbo = gl.glGenBuffers(1) if self._pbo is None else self._pbo + + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._pbo) + gl.glBufferData(gl.GL_ARRAY_BUFFER, self.width*self.height*dtype.itemsize, None, gl.GL_STREAM_DRAW) + gl.glBindBuffer(gl.GL_ARRAY_BUFFER, 0) + + self.cuda_gfx_ressource = check_cudart_err( + cudart.cudaGraphicsGLRegisterBuffer(self._pbo, + cudart.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsWriteDiscard) + ) else: msg = f'Buffer type {buffer_type} has not been implemented yet.' raise NotImplementedError(msg) @@ -215,3 +309,15 @@ def _set_stream(self, value): assert isinstance(value, cp.cuda.Stream), type(value) self._stream = value stream = property(_get_stream, _set_stream) + + def _get_cuda_gfx_ressource(self): + assert self._cuda_gfx_ressource is not None + return self._cuda_gfx_ressource + def _set_cuda_gfx_ressource(self, value): + if (self._cuda_gfx_ressource is not None) and (self._cuda_gfx_ressource != value): + check_cudart_err( + cudart.cudaGraphicsUnregisterResource(self._cuda_gfx_ressource) + ) + self._cuda_gfx_ressource = value + + cuda_gfx_ressource = property(_get_cuda_gfx_ressource, _set_cuda_gfx_ressource) diff --git a/examples/sutil/gl_display.py b/examples/sutil/gl_display.py index bee2939..280cf1e 100644 --- a/examples/sutil/gl_display.py +++ b/examples/sutil/gl_display.py @@ -49,7 +49,6 @@ class GLDisplay: '_quad_vertex_buffer', '_image_format'] def __init__(self, image_format): - print(image_format, type(image_format), isinstance(BufferImageFormat.UCHAR4, BufferImageFormat)) assert isinstance(image_format, BufferImageFormat) vertex_array = gl.glGenVertexArrays(1) diff --git a/examples/triangle.py b/examples/triangle.py index b9d7bb3..2b319fa 100644 --- a/examples/triangle.py +++ b/examples/triangle.py @@ -1,8 +1,12 @@ +import os import optix as ox import cupy as cp import numpy as np from PIL import Image, ImageOps +script_dir = os.path.dirname(__file__) +cuda_src = os.path.join(script_dir, "cuda", "triangle.cu") + img_size = (1024, 768) # use a regular function for logging @@ -19,7 +23,7 @@ def create_acceleration_structure(ctx, vertices): def create_module(ctx, pipeline_opts): compile_opts = ox.ModuleCompileOptions(debug_level=ox.CompileDebugLevel.FULL, opt_level=ox.CompileOptimizationLevel.LEVEL_0) - module = ox.Module(ctx, 'cuda/triangle.cu', compile_opts, pipeline_opts) + module = ox.Module(ctx, cuda_src, compile_opts, pipeline_opts) return module diff --git a/optix/__init__.py b/optix/__init__.py index 9a364ce..41d1015 100644 --- a/optix/__init__.py +++ b/optix/__init__.py @@ -1,12 +1,12 @@ -from .context import DeviceContext, optix_version +from .context import * from .build import * -from .module import Module, ModuleCompileOptions, CompileOptimizationLevel, CompileDebugLevel, PayloadSemantics, Task -from .program_group import ProgramGroup -from .struct import SbtRecord, LaunchParamsRecord -from .shader_binding_table import ShaderBindingTable -from .pipeline import CompileDebugLevel, ExceptionFlags, TraversableGraphFlags, \ - PrimitiveTypeFlags, PipelineCompileOptions, PipelineLinkOptions, Pipeline +from .module import * +from .program_group import * +from .struct import * +from .shader_binding_table import * +from .pipeline import * from .denoiser import * +from .micromap import * from .logging_utility import Logger from ._version import __version__ diff --git a/optix/_version.py b/optix/_version.py index 3dc1f76..5becc17 100644 --- a/optix/_version.py +++ b/optix/_version.py @@ -1 +1 @@ -__version__ = "0.1.0" +__version__ = "1.0.0" diff --git a/optix/base.pyx b/optix/base.pyx index 0173530..d9fc39a 100644 --- a/optix/base.pyx +++ b/optix/base.pyx @@ -9,4 +9,4 @@ cdef class OptixObject: return "" def __repr__(self): - return f"" \ No newline at end of file + return f"optix.{self.__class__.__name__}({self._repr_details()})" \ No newline at end of file diff --git a/optix/build.pxd b/optix/build.pxd index 895d613..74570bb 100644 --- a/optix/build.pxd +++ b/optix/build.pxd @@ -2,7 +2,8 @@ from .common cimport OptixResult, CUstream, CUdeviceptr from .context cimport OptixDeviceContext, OptixContextObject from libcpp.vector cimport vector from .base cimport OptixObject -from libc.stdint cimport uintptr_t +from libc.stdint cimport uintptr_t, uint32_t +from .micromap cimport OptixBuildInputOpacityMicromap, BuildInputOpacityMicromap cdef extern from "optix.h" nogil: @@ -14,6 +15,8 @@ cdef extern from "optix.h" nogil: OPTIX_BUILD_FLAG_PREFER_FAST_BUILD, OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS, OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS, + OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE, + OPTIX_BUILD_FLAG_ALLOW_DISABLE_OPACITY_MICROMAPS cdef enum OptixBuildOperation: @@ -33,24 +36,27 @@ cdef extern from "optix.h" nogil: float timeBegin float timeEnd + cdef struct OptixAccelBuildOptions: unsigned int buildFlags OptixBuildOperation operation OptixMotionOptions motionOptions + cdef enum OptixBuildInputType: OPTIX_BUILD_INPUT_TYPE_TRIANGLES, OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES, OPTIX_BUILD_INPUT_TYPE_INSTANCES, OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS, - OPTIX_BUILD_INPUT_TYPE_CURVES + OPTIX_BUILD_INPUT_TYPE_CURVES, + OPTIX_BUILD_INPUT_TYPE_SPHERES - #ctypedef uintptr_t CUdeviceptr cdef struct OptixBuildInputInstanceArray: CUdeviceptr instances unsigned int numInstances + cdef struct OptixAabb: float minX float minY @@ -59,6 +65,7 @@ cdef extern from "optix.h" nogil: float maxY float maxZ + cdef struct OptixBuildInputCustomPrimitiveArray: const CUdeviceptr * aabbBuffers unsigned int numPrimitives @@ -70,62 +77,45 @@ cdef extern from "optix.h" nogil: unsigned int sbtIndexOffsetStrideInBytes unsigned int primitiveIndexOffset - IF _OPTIX_VERSION_MAJOR == 7 and _OPTIX_VERSION_MINOR > 3: # switch to new instance flags - cdef enum OptixPrimitiveType: - OPTIX_PRIMITIVE_TYPE_CUSTOM, - OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM, - OPTIX_PRIMITIVE_TYPE_TRIANGLE, - - cdef enum OptixCurveEndcapFlags: - OPTIX_CURVE_ENDCAP_DEFAULT, - OPTIX_CURVE_ENDCAP_ON - - cdef struct OptixBuildInputCurveArray: - OptixPrimitiveType curveType - unsigned int numPrimitives - const CUdeviceptr * vertexBuffers - unsigned int numVertices - unsigned int vertexStrideInBytes - const CUdeviceptr * widthBuffers - unsigned int widthStrideInBytes - const CUdeviceptr * normalBuffers - unsigned int normalStrideInBytes - CUdeviceptr indexBuffer - unsigned int indexStrideInBytes - unsigned int flag - unsigned int primitiveIndexOffset - unsigned int endcapFlags - ELSE: - cdef enum OptixPrimitiveType: - OPTIX_PRIMITIVE_TYPE_CUSTOM, - OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_TRIANGLE, - - cdef struct OptixBuildInputCurveArray: - OptixPrimitiveType curveType - unsigned int numPrimitives - const CUdeviceptr * vertexBuffers - unsigned int numVertices - unsigned int vertexStrideInBytes - const CUdeviceptr * widthBuffers - unsigned int widthStrideInBytes - const CUdeviceptr * normalBuffers - unsigned int normalStrideInBytes - CUdeviceptr indexBuffer - unsigned int indexStrideInBytes - unsigned int flag - unsigned int primitiveIndexOffset + + cdef enum OptixPrimitiveType: + OPTIX_PRIMITIVE_TYPE_CUSTOM, + OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, + OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM, + OPTIX_PRIMITIVE_TYPE_SPHERE, + OPTIX_PRIMITIVE_TYPE_TRIANGLE, + + + cdef enum OptixCurveEndcapFlags: + OPTIX_CURVE_ENDCAP_DEFAULT, + OPTIX_CURVE_ENDCAP_ON + + + cdef struct OptixBuildInputCurveArray: + OptixPrimitiveType curveType + unsigned int numPrimitives + const CUdeviceptr * vertexBuffers + unsigned int numVertices + unsigned int vertexStrideInBytes + const CUdeviceptr * widthBuffers + unsigned int widthStrideInBytes + const CUdeviceptr * normalBuffers + unsigned int normalStrideInBytes + CUdeviceptr indexBuffer + unsigned int indexStrideInBytes + unsigned int flag + unsigned int primitiveIndexOffset + unsigned int endcapFlags + cdef enum OptixIndicesFormat: OPTIX_INDICES_FORMAT_NONE, OPTIX_INDICES_FORMAT_UNSIGNED_SHORT3, OPTIX_INDICES_FORMAT_UNSIGNED_INT3 + cdef enum OptixVertexFormat: OPTIX_VERTEX_FORMAT_NONE, OPTIX_VERTEX_FORMAT_FLOAT3, @@ -135,14 +125,18 @@ cdef extern from "optix.h" nogil: OPTIX_VERTEX_FORMAT_SNORM16_3, OPTIX_VERTEX_FORMAT_SNORM16_2 + cdef enum OptixTransformFormat: OPTIX_TRANSFORM_FORMAT_NONE, OPTIX_TRANSFORM_FORMAT_MATRIX_FLOAT12, + cdef enum OptixGeometryFlags: OPTIX_GEOMETRY_FLAG_NONE, OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT, OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL + OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING + cdef struct OptixBuildInputTriangleArray: const CUdeviceptr * vertexBuffers @@ -161,33 +155,57 @@ cdef extern from "optix.h" nogil: unsigned int sbtIndexOffsetStrideInBytes unsigned int primitiveIndexOffset OptixTransformFormat transformFormat + OptixBuildInputOpacityMicromap opacityMicromap + + + cdef struct OptixBuildInputSphereArray: + const CUdeviceptr* vertexBuffers + unsigned int vertexStrideInBytes + unsigned int numVertices + const CUdeviceptr *radiusBuffers + unsigned int radiusStrideInBytes + int singleRadius + const unsigned int *flags + unsigned int numSbtRecords + CUdeviceptr sbtIndexOffsetBuffer + unsigned int sbtIndexOffsetSizeInBytes + unsigned int sbtIndexOffsetStrideInBytes + unsigned int primitiveIndexOffset + cdef struct OptixBuildInput: OptixBuildInputType type # union OptixBuildInputTriangleArray triangleArray OptixBuildInputCurveArray curveArray + OptixBuildInputSphereArray sphereArray OptixBuildInputCustomPrimitiveArray customPrimitiveArray OptixBuildInputInstanceArray instanceArray + cdef struct OptixAccelBufferSizes: size_t outputSizeInBytes size_t tempSizeInBytes size_t tempUpdateSizeInBytes + cdef enum OptixAccelPropertyType: OPTIX_PROPERTY_TYPE_COMPACTED_SIZE, OPTIX_PROPERTY_TYPE_AABBS, + cdef struct OptixAccelEmitDesc: CUdeviceptr result OptixAccelPropertyType type + ctypedef uintptr_t OptixTraversableHandle - cdef struct OptixAccelRelocationInfo: + + cdef struct OptixRelocationInfo: unsigned long long info[4] + cdef enum OptixTraversableType: OPTIX_TRAVERSABLE_TYPE_STATIC_TRANSFORM, OPTIX_TRAVERSABLE_TYPE_MATRIX_MOTION_TRANSFORM, @@ -200,6 +218,8 @@ cdef extern from "optix.h" nogil: OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT + OPTIX_INSTANCE_FLAG_FORCE_OPACITY_MICROMAP_2_STATE + OPTIX_INSTANCE_FLAG_DISABLE_OPACITY_MICROMAPS cdef struct OptixInstance: @@ -210,6 +230,27 @@ cdef extern from "optix.h" nogil: unsigned int flags OptixTraversableHandle traversableHandle + + cdef struct OptixRelocateInputInstanceArray: + unsigned int numInstances + CUdeviceptr traversableHandles + + + cdef struct OptixRelocateInputOpacityMicromap: + CUdeviceptr opacityMicromapArray + + + cdef struct OptixRelocateInputTriangleArray: + unsigned int numSbtRecords + OptixRelocateInputOpacityMicromap opacityMicromap + + + cdef struct OptixRelocateInput: + OptixBuildInputType type + OptixRelocateInputInstanceArray instanceArray + OptixRelocateInputTriangleArray triangleArray + + OptixResult optixAccelComputeMemoryUsage(OptixDeviceContext context, const OptixAccelBuildOptions * accelOptions, const OptixBuildInput * buildInputs, @@ -232,6 +273,7 @@ cdef extern from "optix.h" nogil: unsigned int numEmittedProperties ) + OptixResult optixAccelCompact(OptixDeviceContext context, CUstream stream, OptixTraversableHandle inputHandle, @@ -240,33 +282,39 @@ cdef extern from "optix.h" nogil: OptixTraversableHandle * outputHandle ) + OptixResult optixAccelRelocate(OptixDeviceContext context, CUstream stream, - const OptixAccelRelocationInfo * info, - CUdeviceptr instanceTraversableHandles, - size_t numInstanceTraversableHandles, + const OptixRelocationInfo * info, + const OptixRelocateInput * relocateInputs, + size_t numRelocateInputs, CUdeviceptr targetAccel, size_t targetAccelSizeInBytes, OptixTraversableHandle * targetHandle ) - OptixResult optixAccelCheckRelocationCompatibility(OptixDeviceContext context, - const OptixAccelRelocationInfo * info, + + OptixResult optixCheckRelocationCompatibility(OptixDeviceContext context, + const OptixRelocationInfo * info, int * compatible ) + OptixResult optixAccelGetRelocationInfo(OptixDeviceContext context, OptixTraversableHandle handle, - OptixAccelRelocationInfo * info + OptixRelocationInfo * info ) + OptixResult optixConvertPointerToTraversableHandle(OptixDeviceContext onDevice, CUdeviceptr pointer, OptixTraversableType traversableType, OptixTraversableHandle * traversableHandle ) + cdef class BuildInputArray(OptixObject): + cdef OptixBuildInputType build_input_type cdef void prepare_build_input(self, OptixBuildInput* build_input) except * cdef size_t num_elements(self) @@ -279,6 +327,7 @@ cdef class BuildInputTriangleArray(BuildInputArray): cdef object _d_sbt_offset_buffer cdef object _d_pre_transform cdef vector[unsigned int] _flags + cdef BuildInputOpacityMicromap c_opacity_micromap cdef class BuildInputCustomPrimitiveArray(BuildInputArray): @@ -301,6 +350,16 @@ cdef class BuildInputCurveArray(BuildInputArray): cdef object _d_index_buffer +cdef class BuildInputSphereArray(BuildInputArray): + cdef OptixBuildInputSphereArray build_input + cdef list _d_vertex_buffers + cdef vector[CUdeviceptr] _d_vertex_buffer_ptrs + cdef list _d_radius_buffers + cdef vector[CUdeviceptr] _d_radius_buffer_ptrs + cdef object _d_sbt_offset_buffer + cdef vector[unsigned int] _flags + + cdef class Instance(OptixObject): cdef OptixInstance instance cdef AccelerationStructure _traversable @@ -319,6 +378,8 @@ cdef class AccelerationStructure(OptixContextObject): cdef OptixAccelBufferSizes _buffer_sizes cdef object _instances cdef OptixTraversableHandle _handle + cdef list _relocate_deps + cdef void _init_build_inputs(self, build_inputs, vector[OptixBuildInput]& ret) cdef void _init_accel_options(self, size_t num_build_inputs, unsigned int build_flags, OptixBuildOperation operation, vector[OptixAccelBuildOptions]& ret) cdef void build(self, build_inputs, stream=*) diff --git a/optix/build.pyx b/optix/build.pyx index 4cdb66a..a4d9fa0 100644 --- a/optix/build.pyx +++ b/optix/build.pyx @@ -8,6 +8,8 @@ from enum import IntEnum, IntFlag from libc.string cimport memcpy, memset from libcpp.vector cimport vector from .common import round_up, ensure_iterable +import typing as typ +from .micromap cimport BuildInputOpacityMicromap, OpacityMicromapArray optix_init() @@ -17,12 +19,14 @@ __all__ = ['GeometryFlags', 'BuildInputTriangleArray', 'BuildInputCustomPrimitiveArray', 'BuildInputCurveArray', + 'BuildInputSphereArray', 'BuildInputInstanceArray', 'Instance', 'AccelerationStructure', - 'CurveEndcapFlags' + 'CurveEndcapFlags', ] + class GeometryFlags(IntEnum): """ Wraps the OptixGeometryFlags enum. @@ -30,6 +34,7 @@ class GeometryFlags(IntEnum): NONE = OPTIX_GEOMETRY_FLAG_NONE, DISABLE_ANYHIT = OPTIX_GEOMETRY_FLAG_DISABLE_ANYHIT, REQUIRE_SINGLE_ANYHIT_CALL = OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL + DISABLE_TRIANGLE_FACE_CULLING = OPTIX_GEOMETRY_FLAG_DISABLE_TRIANGLE_FACE_CULLING class BuildFlags(IntFlag): @@ -45,34 +50,22 @@ class BuildFlags(IntFlag): ALLOW_RANDOM_INSTANCE_ACCESS = OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS, -IF _OPTIX_VERSION > 70300: # switch to new instance flags - class PrimitiveType(IntEnum): - """ - Wraps the OptixPrimitiveType enum. - """ - CUSTOM = OPTIX_PRIMITIVE_TYPE_CUSTOM, - ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, - ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, - ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, - ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM, - TRIANGLE = OPTIX_PRIMITIVE_TYPE_TRIANGLE - - class CurveEndcapFlags(IntEnum): - DEFAULT = OPTIX_CURVE_ENDCAP_DEFAULT, - ON = OPTIX_CURVE_ENDCAP_ON -ELSE: - class CurveEndcapFlags(IntEnum): - DEFAULT = 0 # only for interface. Ignored for Optix versions below 7.4 - - class PrimitiveType(IntEnum): - """ - Wraps the OptixPrimitiveType enum. - """ - CUSTOM = OPTIX_PRIMITIVE_TYPE_CUSTOM, - ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, - ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, - ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR, - TRIANGLE = OPTIX_PRIMITIVE_TYPE_TRIANGLE +class PrimitiveType(IntEnum): + """ + Wraps the OptixPrimitiveType enum. + """ + CUSTOM = OPTIX_PRIMITIVE_TYPE_CUSTOM, + ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_QUADRATIC_BSPLINE, + ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_ROUND_CUBIC_BSPLINE, + ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_ROUND_LINEAR + ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_ROUND_CATMULLROM + SPHERE = OPTIX_PRIMITIVE_TYPE_SPHERE + TRIANGLE = OPTIX_PRIMITIVE_TYPE_TRIANGLE + + +class CurveEndcapFlags(IntEnum): + DEFAULT = OPTIX_CURVE_ENDCAP_DEFAULT, + ON = OPTIX_CURVE_ENDCAP_ON class InstanceFlags(IntFlag): @@ -84,18 +77,36 @@ class InstanceFlags(IntFlag): FLIP_TRIANGLE_FACING = OPTIX_INSTANCE_FLAG_FLIP_TRIANGLE_FACING, DISABLE_ANYHIT = OPTIX_INSTANCE_FLAG_DISABLE_ANYHIT, ENFORCE_ANYHIT = OPTIX_INSTANCE_FLAG_ENFORCE_ANYHIT, + FORCE_OPACITY_MICROMAP_2_STATE = OPTIX_INSTANCE_FLAG_FORCE_OPACITY_MICROMAP_2_STATE, + DISABLE_OPACITY_MICROMAPS = OPTIX_INSTANCE_FLAG_DISABLE_OPACITY_MICROMAPS + + +class BuildInputType(IntEnum): + TRIANGLES = OPTIX_BUILD_INPUT_TYPE_TRIANGLES, + CUSTOM_PRIMITIVES = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES, + INSTANCES = OPTIX_BUILD_INPUT_TYPE_INSTANCES, + INSTANCE_POINTERS = OPTIX_BUILD_INPUT_TYPE_INSTANCE_POINTERS, + CURVES = OPTIX_BUILD_INPUT_TYPE_CURVES, + SPHERES = OPTIX_BUILD_INPUT_TYPE_SPHERES cdef class BuildInputArray(OptixObject): """ Base class for all BuildInput Arrays. This is an internal class. """ + def __init__(self, type): + self.build_input_type = (BuildInputType(type).value) + cdef void prepare_build_input(self, OptixBuildInput* build_input) except *: pass cdef size_t num_elements(self): return 0 + @property + def type(self): + return BuildInputType(self.build_input_type) + cdef class BuildInputTriangleArray(BuildInputArray): """ @@ -128,9 +139,9 @@ cdef class BuildInputTriangleArray(BuildInputArray): flags = None, sbt_record_offset_buffer = None, pre_transform = None, - primitive_index_offset = 0 - ): - + primitive_index_offset = 0, + opacity_micromap: typ.Optional[BuildInputOpacityMicromap] = None): + super().__init__(BuildInputType.TRIANGLES) self._d_vertex_buffers = [cp.asarray(vb) for vb in ensure_iterable(vertex_buffers)] self._d_vertex_buffer_ptrs.reserve(len(self._d_vertex_buffers)) @@ -164,6 +175,7 @@ cdef class BuildInputTriangleArray(BuildInputArray): self.build_input.indexStrideInBytes = 0 self.build_input.numIndexTriplets = 0 self.build_input.indexBuffer = 0 + self._d_index_buffer = None self.build_input.numSbtRecords = num_sbt_records @@ -201,11 +213,16 @@ cdef class BuildInputTriangleArray(BuildInputArray): self.build_input.preTransform = 0 self.build_input.transformFormat = OPTIX_TRANSFORM_FORMAT_NONE + self.c_opacity_micromap = opacity_micromap + if self.c_opacity_micromap is not None: + self.build_input.opacityMicromap = self.c_opacity_micromap.build_input + + def __dealloc__(self): pass cdef void prepare_build_input(self, OptixBuildInput* build_input) except *: - build_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES + build_input.type = self.build_input_type build_input.triangleArray = self.build_input def _vertex_format(self, dtype, shape): @@ -239,6 +256,19 @@ cdef class BuildInputTriangleArray(BuildInputArray): cdef size_t num_elements(self): return self.build_input.numVertices + @property + def micromap(self): + return self.c_opacity_micromap + + @property + def num_sbt_records(self): + return self.build_input.numSbtRecords + + + def _repr_details(self): + return f"nvertices={self.num_elements()}, " \ + f"ntriangles={self._d_index_buffer.shape[0] if self._d_index_buffer is not None else self.num_elements() // 3}, " \ + f"n_sbt_records={self.build_input.numSbtRecords}" cdef class BuildInputCustomPrimitiveArray(BuildInputArray): """ @@ -268,6 +298,7 @@ cdef class BuildInputCustomPrimitiveArray(BuildInputArray): sbt_record_offset_buffer = None, primitive_index_offset = 0 ): + super().__init__(BuildInputType.CUSTOM_PRIMITIVES) self._d_aabb_buffers = [cp.asarray(ab, dtype=np.float32).reshape(-1, 6) for ab in aabb_buffers] self._d_aabb_buffer_ptrs.reserve(len(self._d_aabb_buffers)) @@ -318,14 +349,13 @@ cdef class BuildInputCustomPrimitiveArray(BuildInputArray): self.build_input.primitiveIndexOffset = primitive_index_offset cdef void prepare_build_input(self, OptixBuildInput * build_input) except *: - build_input.type = OPTIX_BUILD_INPUT_TYPE_CUSTOM_PRIMITIVES + build_input.type = self.build_input_type build_input.customPrimitiveArray = self.build_input cdef size_t num_elements(self): return self.build_input.numPrimitives - cdef class BuildInputCurveArray(BuildInputArray): """ BuildInputArray for curve inputs. This class wraps the OptixBuildInputCurveArray struct. @@ -360,7 +390,7 @@ cdef class BuildInputCurveArray(BuildInputArray): flags=None, primitive_index_offset=0, endcap_flags=CurveEndcapFlags.DEFAULT): - + super().__init__(BuildInputType.CURVES) self.build_input.curveType = curve_type.value self._d_vertex_buffers = [cp.asarray(vb, np.float32) for vb in ensure_iterable(vertex_buffers)] self._d_vertex_buffer_ptrs.reserve(len(self._d_vertex_buffers)) @@ -419,17 +449,124 @@ cdef class BuildInputCurveArray(BuildInputArray): self.build_input.primitiveIndexOffset = primitive_index_offset - IF _OPTIX_VERSION > 70300: - self.build_input.endcapFlags = endcap_flags # only for Optix versions >= 7.4 + self.build_input.endcapFlags = endcap_flags cdef void prepare_build_input(self, OptixBuildInput * build_input) except *: - build_input.type = OPTIX_BUILD_INPUT_TYPE_CURVES + build_input.type = self.build_input_type build_input.curveArray = self.build_input cdef size_t num_elements(self): return self.build_input.numPrimitives +cdef class BuildInputSphereArray(BuildInputArray): + """ + BuildInputArray for a sphere. This class wraps the OptixBuildInputSphereArray struct. + In Contrast to the behavior of the Optix C++ API, this Python class will automatically convert all numpy.ndarrays + to cupy.ndarrays and keep track of them. + + Parameters + ---------- + vertex_buffers: + List of vertex buffers (one for each motion step) or a single array. + All arrays will be converted to cupy.ndarrays before any further processing. + index_buffer: ndarray, optional + A single 2d array containing the indices of all triangles or None + num_sbt_records: int + The number of records in the ShaderBindingTable for this geometry + flags: GeometryFlags + Flags to use in this input for each motionstep + sbt_record_offset_buffer: ndarray, optional + Offsets into the ShaderBindingTable record for each primitive (index) or None + pre_transform: ndarray(3,4) or None + A transform to apply prior to processing + primitive_index_offset: int + The offset applied to the primitive index in device code + """ + def __init__(self, + vertex_buffers, + radius_buffers, + num_sbt_records = 1, + flags = None, + sbt_record_offset_buffer = None, + pre_transform = None, + primitive_index_offset = 0 + ): + super().__init__(BuildInputType.SPHERES) + self._d_vertex_buffers = [cp.asarray(vb) for vb in ensure_iterable(vertex_buffers)] + self._d_vertex_buffer_ptrs.reserve(len(self._d_vertex_buffers)) + + self._d_radius_buffers = [cp.asarray(vb) for vb in ensure_iterable(radius_buffers)] + self._d_radius_buffer_ptrs.reserve(len(self._d_radius_buffers)) + + if len(self._d_radius_buffers) != len(self._d_vertex_buffers): + raise ValueError("Argument radius_buffers must have the same number of arrays as vertex_buffers.") + + if len(self._d_vertex_buffers) == 0: + raise ValueError("BuildInputSphereArray cannot be empty.") + + dtype = self._d_vertex_buffers[0].dtype + shape = self._d_vertex_buffers[0].shape + strides = self._d_vertex_buffers[0].strides + + radius_dtype = self._d_radius_buffers[0].dtype + radius_shape = self._d_radius_buffers[0].shape + strides = self._d_radius_buffers[0].strides + + for vb, rb in zip(self._d_vertex_buffers, self._d_radius_buffers): + if vb.dtype != dtype or vb.shape != shape or vb.strides != strides: + raise ValueError("All vertex buffers must have the same size and dtype.") + self._d_vertex_buffer_ptrs.push_back(vb.data.ptr) + + if rb.dtype != dtype or rb.shape != shape or rb.strides != strides: + raise ValueError("All radius buffers must have the same size and dtype.") + self._d_radius_buffer_ptrs.push_back(rb.data.ptr) + + self.build_input.vertexBuffers = self._d_vertex_buffer_ptrs.const_data() + self.build_input.radiusBuffers = self._d_radius_buffer_ptrs.const_data() + + self.build_input.vertexStrideInBytes = self._d_vertex_buffers[0].strides[0] + self.build_input.radiusStrideInBytes = self._d_radius_buffers[0].strides[0] + + self.build_input.numVertices = shape[0] + self.build_input.singleRadius = 1 if self._d_radius_buffers[0].shape[0] == 1 else 0 + + self.build_input.numSbtRecords = num_sbt_records + self._flags.resize(num_sbt_records) + + if flags is None: + for i in range(num_sbt_records): + self._flags[i] = OPTIX_GEOMETRY_FLAG_NONE + else: + for i in range(num_sbt_records): + self._flags[i] = flags[i].value + + self.build_input.flags = self._flags.data() + + + if sbt_record_offset_buffer is not None: + self._d_sbt_offset_buffer = cp.asarray(sbt_record_offset_buffer).ravel() + self.build_input.sbtIndexOffsetBuffer = self._d_sbt_offset_buffer.data.ptr + itemsize = self._d_sbt_offset_buffer.itemsize + if itemsize > 4: + raise ValueError("Only 32 bit allowed at max") + self.build_input.sbtIndexOffsetSizeInBytes = itemsize + self.build_input.sbtIndexOffsetStrideInBytes = self._d_sbt_offset_buffer.strides[0] + else: + self.build_input.sbtIndexOffsetBuffer = 0 + self.build_input.sbtIndexOffsetStrideInBytes = 0 + self.build_input.sbtIndexOffsetSizeInBytes = 0 + + self.build_input.primitiveIndexOffset = primitive_index_offset + + cdef void prepare_build_input(self, OptixBuildInput * build_input) except *: + build_input.type = self.build_input_type + build_input.sphereArray = self.build_input + + cdef size_t num_elements(self): + return self.build_input.numVertices + + cdef class Instance(OptixObject): """ Class representing a single instance (another AccelerationStructure) for use in a Instance level AccelerationStructure. @@ -483,14 +620,17 @@ cdef class Instance(OptixObject): # update the handle as well self.instance.traversableHandle = self.traversable.handle - def __deepcopy__(self, memodict={}): - from copy import deepcopy + def relocate(self, + device: typ.Optional[DeviceContext] = None, + stream: typ.Optional[cp.cuda.Stream] = None): cls = self.__class__ result = cls.__new__(cls) - memodict[id(self)] = result result.instance = self.instance - result.traversable = deepcopy(self.traversable) + result.traversable = self.traversable.relocate(device=device, stream=stream) + def __deepcopy__(self, memo): + result = self.relocate() + memo[id(self)] = result return result @property @@ -516,6 +656,7 @@ cdef class BuildInputInstanceArray(BuildInputArray): A list of the Instances to use as input """ def __init__(self, instances): + super().__init__(BuildInputType.INSTANCES) instances = ensure_iterable(instances) self.instances = instances @@ -532,7 +673,7 @@ cdef class BuildInputInstanceArray(BuildInputArray): self.build_input.numInstances = len(instances) cdef void prepare_build_input(self, OptixBuildInput * build_input) except *: - build_input.type = OPTIX_BUILD_INPUT_TYPE_INSTANCES + build_input.type = self.build_input_type build_input.instanceArray = self.build_input cdef size_t num_elements(self): @@ -583,6 +724,92 @@ cdef class BuildInputInstanceArray(BuildInputArray): return cp.ndarray(shape=(3,4), dtype=np.float32, memptr=device_ptr) +cdef class RelocationDependency: + cdef OptixBuildInputType _type + + def __init__(self, type): + self._type = (BuildInputType(type).value) + + @property + def type(self): + return BuildInputType(self._type) + + cdef RelocationDependency relocate(self, device, stream): + return self + + cdef void fill_relocation_input(self, OptixRelocateInput& input): + input.type = self._type + + cdef void finalize_relocation_input(self): + pass + +cdef class RelocationInstanceDependency(RelocationDependency): + cdef object instances + cdef object d_instances + + def __init__(self, instances): + super().__init__(BuildInputType.INSTANCES) + self.instances = instances + self.d_instances = None + + cdef RelocationInstanceDependency relocate(self, device, stream): + relocated_instances = [inst.relocate(device=device, stream=stream) for inst in self.instances] + result = self.__class__(relocated_instances) + return result + + cdef void fill_relocation_input(self, OptixRelocateInput& input): + cdef vector[OptixTraversableHandle] c_instance_handles + cdef ssize_t c_instance_handles_size = 0 + cdef object d_instances + cdef size_t i + cdef CUdeviceptr d_instances_ptr = 0 + cdef vector[OptixRelocateInput] c_relocate_inputs + cdef size_t num_relocate_inputs + + input.type = self.type + input.instanceArray.numInstances = len(self.instances) + + # prepare the new instance handles for relocation by copiing them into a temporary device buffer + c_instance_handles.resize(len(self.instances)) + c_instance_handles_size = sizeof(OptixTraversableHandle) * c_instance_handles.size() + + self.d_instances = cp.cuda.alloc(c_instance_handles_size) + for i in range(c_instance_handles.size()): + c_instance_handles[i] = self.instances[i].traversable.handle + + d_instances_ptr = self.d_instances.ptr + cp.cuda.runtime.memcpy(d_instances_ptr, c_instance_handles.data(), c_instance_handles_size, + cp.cuda.runtime.memcpyHostToDevice) + c_relocate_inputs[0].instanceArray.traversableHandles = d_instances_ptr + input.instanceArray.traversableHandles = d_instances_ptr + + cdef void finalize_relocation_input(self): + self.d_instances = None # remove the temporary cuda buffer again + + +cdef class RelocationTriangleDependency(RelocationDependency): + cdef unsigned int num_sbt_records + cdef OpacityMicromapArray micromap + + def __init__(self, num_sbt_records, micromap=None): + super().__init__(BuildInputType.INSTANCES) + self.num_sbt_records = num_sbt_records + self.micromap = micromap + + cdef RelocationInstanceDependency relocate(self, device, stream): + if self.micromap is not None: + relocated_micromap = self.micromap.relocate(device=device, stream=stream) + else: + relocated_micromap = None + result = self.__class__(self.num_sbt_records, relocated_micromap) + return result + + cdef void fill_relocation_input(self, OptixRelocateInput& input): + input.type = self.type + input.triangleArray.numSbtRecords = self.num_sbt_records + input.triangleArray.opacityMicromap.opacityMicromapArray = self.micromap.d_micromap_array_buffer.ptr + + cdef class AccelerationStructure(OptixContextObject): """ Class representing a Geometry Acceleration Structure (GAS) or Instance Acceleration Structure (IAS). This wraps the OptixTraversableHandle internally and manages the ressources like @@ -605,6 +832,10 @@ cdef class AccelerationStructure(OptixContextObject): Allow for random access of the vertices in triangle geometry random_instance_access: bool Allow for random access of the instances if an IAS is built + allow_opacity_micromap_update: bool + Allows to update the opacity micromaps in this structure + allow_disable_opacity_micromaps: bool + Allows to disable the opacity micromaps for instances in this structure stream: cupy.cuda.Stream, optional Cuda stream to use. If None the default stream is used """ @@ -616,6 +847,8 @@ cdef class AccelerationStructure(OptixContextObject): prefer_fast_build=False, random_vertex_access=False, random_instance_access=False, + allow_opacity_micromap_update=False, + allow_disable_opacity_micromaps=False, stream=None): super().__init__(context) @@ -634,9 +867,15 @@ cdef class AccelerationStructure(OptixContextObject): self._build_flags |= OPTIX_BUILD_FLAG_ALLOW_RANDOM_VERTEX_ACCESS if random_instance_access: self._build_flags |= OPTIX_BUILD_FLAG_ALLOW_RANDOM_INSTANCE_ACCESS + if allow_opacity_micromap_update: + self._build_flags |= OPTIX_BUILD_FLAG_ALLOW_OPACITY_MICROMAP_UPDATE + if allow_disable_opacity_micromaps: + self._build_flags |= OPTIX_BUILD_FLAG_ALLOW_DISABLE_OPACITY_MICROMAPS + self._gas_buffer = None self._instances = None + self._relocate_deps = [] build_inputs = ensure_iterable(build_inputs) self.build(build_inputs, stream=stream) @@ -677,14 +916,23 @@ cdef class AccelerationStructure(OptixContextObject): cdef void build(self, build_inputs, stream=None): # build a single vector from all the build inputs cdef size_t inputs_size = len(build_inputs) - cdef vector[OptixBuildInput] inputs #= vector[OptixBuildInput](inputs_size) - - self._init_build_inputs(build_inputs, inputs) + cdef vector[OptixBuildInput] c_inputs #= vector[OptixBuildInput](inputs_size) + cdef size_t i - if isinstance(build_inputs[0], BuildInputInstanceArray): - if inputs_size > 1: - raise ValueError("Only a single build input allowed for instance builds") - self._instances = (build_inputs[0]).instances # keep the instances so the buffers do not get deleted + self._init_build_inputs(build_inputs, c_inputs) + + for build_input in build_inputs: + if isinstance(build_input, BuildInputInstanceArray): + relocation_dep = RelocationInstanceDependency((build_input).instances) + if inputs_size > 1: + raise ValueError("Only a single build input allowed for instance builds") + elif isinstance(build_input, BuildInputTriangleArray): + micromap = build_input.micromap + micromap_array = micromap.micromap_array if micromap is not None else None + relocation_dep = RelocationTriangleDependency(build_input.num_sbt_records, micromap=micromap_array) + else: + relocation_dep = RelocationDependency(build_input.type) + self._relocate_deps.append(relocation_dep) cdef vector[OptixAccelBuildOptions] accel_options# = vector[OptixAccelBuildOptions](inputs_size) self._init_accel_options(inputs_size, self._build_flags, OPTIX_BUILD_OPERATION_BUILD, accel_options) @@ -697,7 +945,7 @@ cdef class AccelerationStructure(OptixContextObject): optix_check_return(optixAccelComputeMemoryUsage(self.context.c_context, accel_options.data(), - inputs.data(), + c_inputs.data(), inputs_size, &self._buffer_sizes)) @@ -726,7 +974,7 @@ cdef class AccelerationStructure(OptixContextObject): optix_check_return(optixAccelBuild(self.context.c_context, c_stream, accel_options.data(), - inputs.data(), + c_inputs.data(), inputs_size, tmp_gas_buffer_ptr, self._buffer_sizes.tempSizeInBytes, @@ -770,6 +1018,8 @@ cdef class AccelerationStructure(OptixContextObject): cdef size_t inputs_size = len(build_inputs) + if inputs_size != len(self._relocate_deps): + raise ValueError("Number of build inputs given to update() must be the same as the one used to build this GAS") cdef vector[OptixBuildInput] inputs #= vector[OptixBuildInput](inputs_size) self._init_build_inputs(build_inputs, inputs) @@ -802,30 +1052,50 @@ cdef class AccelerationStructure(OptixContextObject): NULL, 0)) - def __deepcopy__(self, memodict={}): + def __deepcopy__(self, memo): + """ + Perform a deep copy of the AccelerationStructure by using the standard python copy.deepcopy function. """ - Perform a deep copy of the AccelerationStructure by using the standard python copy.deepcopy function. This method - will also handle any necessary relocation tasks required by optiX on a copy. + # relocate on the same device to perform a regular deep copy + result = self.relocate(device=None) + memo[id(self)] = result + return result + + + def relocate(self, + device: typ.Optional[DeviceContext] = None, + stream: typ.Optional[cp.cuda.Stream] = None): + """ + Relocate this acceleration structure into another copy. Usually this is equivalent to a deep copy. + Additionally, the accleration structure can be copied to a different defice by specifying the device + parameter with a different DeviceContext. Parameters ---------- - memodict - + device: + An optional DeviceContext. The resulting copy of the acceleration structure will be copied + to this device. If None, the acceleration structure's current device is used. + stream: + The stream to use for the relocation. If None, the default stream (0) is used. Returns ------- copy: AccelerationStructure - The copy of the AccelerationStructure + The copy of the AccelerationStructure on the new device """ from copy import deepcopy # relocate the optix structure - cdef OptixAccelRelocationInfo gas_info - memset(&gas_info, 0, sizeof(OptixAccelRelocationInfo)) # init struct to 0 + cdef OptixRelocationInfo gas_info + memset(&gas_info, 0, sizeof(OptixRelocationInfo)) # init struct to 0 optix_check_return(optixAccelGetRelocationInfo(self.context.c_context, self._handle, &gas_info)) + if device is None: + device = self.context + + # check if the new device is compatible with this acceleration structure cdef int compatible = 0 - optix_check_return(optixAccelCheckRelocationCompatibility(self.context.c_context, + optix_check_return(optixCheckRelocationCompatibility((device).c_context, &gas_info, &compatible)) if compatible != 1: @@ -835,47 +1105,53 @@ cdef class AccelerationStructure(OptixContextObject): cls = self.__class__ cdef AccelerationStructure result = cls.__new__(cls) - memodict[id(self)] = result - - result.context = self.context + result.context = device result._build_flags = self._build_flags result._buffer_sizes = self._buffer_sizes - result._instances = deepcopy(self._instances) # copy all instances and their AccelerationStructures first - + + #if self._instances is not None: + # result._instances = [inst.relocate(device=device, stream=stream) for inst in self._instances] # copy all instances and their AccelerationStructures first + buffer_size = round_up(self._buffer_sizes.outputSizeInBytes, 8) + 8 result._gas_buffer = cp.cuda.alloc(buffer_size) cp.cuda.runtime.memcpy(result._gas_buffer.ptr, self._gas_buffer.ptr, buffer_size, cp.cuda.runtime.memcpyDeviceToDevice) - cdef vector[OptixTraversableHandle] c_instance_handles - cdef ssize_t c_instance_handles_size = 0 - cdef object d_instances - cdef size_t i - cdef CUdeviceptr d_instances_ptr = 0 - - if result._instances is not None: - # prepare the new instance handles for relocation - c_instance_handles.resize(len(result._instances)) - c_instance_handles_size = sizeof(OptixTraversableHandle) * c_instance_handles.size() - d_instances = cp.cuda.alloc(c_instance_handles_size) - for i in range(c_instance_handles.size()): - c_instance_handles[i] = result._instances[i].traversable.handle - d_instances_ptr = d_instances.ptr - cp.cuda.runtime.memcpy(d_instances_ptr, c_instance_handles.data(), c_instance_handles_size, cp.cuda.runtime.memcpyHostToDevice) + #cdef vector[OptixTraversableHandle] c_instance_handles + #cdef ssize_t c_instance_handles_size = 0 + #cdef object d_instances + #cdef size_t i + #cdef CUdeviceptr d_instances_ptr = 0 + cdef vector[OptixRelocateInput] c_relocate_inputs + #cdef size_t num_relocate_inputs + c_relocate_inputs.resize(len(self._relocate_deps)) + + # prepare to relocate the dependencies (micromaps and instances) + result._relocate_deps = [] + for i, dep in enumerate(self._relocate_deps): + relocated_dep = dep.relocate(device, stream) + result._relocate_deps.append(relocated_dep) + relocated_dep.fill_relocation_input(c_relocate_inputs[i]) result._handle = 0 - cdef uintptr_t c_stream = 0 + + if stream is not None: + c_stream = stream.ptr + cdef OptixTraversableHandle c_handle = 0 optix_check_return(optixAccelRelocate(result.context.c_context, c_stream, &gas_info, - d_instances_ptr, - c_instance_handles_size, + &c_relocate_inputs[0], + c_relocate_inputs.size(), result._gas_buffer, self._buffer_sizes.outputSizeInBytes, &c_handle)) result._handle = c_handle + for dep in result._relocate_deps: + dep.finalize_relocation_input() + return result @property diff --git a/optix/context.pyx b/optix/context.pyx index 0049179..a2d484a 100644 --- a/optix/context.pyx +++ b/optix/context.pyx @@ -4,9 +4,9 @@ from .common cimport optix_check_return, optix_init from libc.stdint cimport uintptr_t, int32_t import cupy as cp -optix_init() +__all__ = ['optix_version', 'DeviceContext'] -OPTIX_VERSION = _OPTIX_VERSION +optix_init() def optix_version(): return _OPTIX_VERSION_MAJOR, _OPTIX_VERSION_MINOR, _OPTIX_VERSION_MICRO @@ -144,13 +144,13 @@ cdef class DeviceContext(OptixObject): """ The callback function for logging """ - return self._log_callback_function + return self._log_callback @log_callback.setter def log_callback(self, object log_callback_function): - self._log_callback_function = log_callback_function - if self._log_callback_function is not None: - optix_check_return(optixDeviceContextSetLogCallback(self.c_context, context_log_cb, self._log_callback_function, self._log_callback_level)) + self._log_callback = log_callback_function + if self._log_callback is not None: + optix_check_return(optixDeviceContextSetLogCallback(self.c_context, context_log_cb, self._log_callback, self._log_callback_level)) @property def log_callback_level(self): diff --git a/optix/denoiser.pxd b/optix/denoiser.pxd index 5172fce..661da14 100644 --- a/optix/denoiser.pxd +++ b/optix/denoiser.pxd @@ -6,20 +6,14 @@ from libc.stdint cimport uintptr_t from libcpp cimport bool cdef extern from "optix_includes.h" nogil: - IF _OPTIX_VERSION > 70300: - cdef enum OptixDenoiserModelKind: - OPTIX_DENOISER_MODEL_KIND_LDR - OPTIX_DENOISER_MODEL_KIND_HDR - OPTIX_DENOISER_MODEL_KIND_AOV - OPTIX_DENOISER_MODEL_KIND_TEMPORAL - OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV - ELSE: - cdef enum OptixDenoiserModelKind: - OPTIX_DENOISER_MODEL_KIND_LDR - OPTIX_DENOISER_MODEL_KIND_HDR - OPTIX_DENOISER_MODEL_KIND_AOV - OPTIX_DENOISER_MODEL_KIND_TEMPORAL - + cdef enum OptixDenoiserModelKind: + OPTIX_DENOISER_MODEL_KIND_LDR + OPTIX_DENOISER_MODEL_KIND_HDR + OPTIX_DENOISER_MODEL_KIND_AOV + OPTIX_DENOISER_MODEL_KIND_TEMPORAL + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV + OPTIX_DENOISER_MODEL_KIND_UPSCALE2X + OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X cdef struct OptixDenoiserOptions: unsigned int guideAlbedo @@ -30,12 +24,22 @@ cdef extern from "optix_includes.h" nogil: size_t withOverlapScratchSizeInBytes size_t withoutOverlapScratchSizeInBytes unsigned int overlapWindowSizeInPixels + size_t computeAverageColorSizeInBytes + size_t computeIntensitySizeInBytes + size_t internalGuideLayerPixelSizeInBytes + + cdef enum OptixDenoiserAlphaMode: + OPTIX_DENOISER_ALPHA_MODE_COPY, + OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV, + OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS cdef struct OptixDenoiserParams: - unsigned int denoiseAlpha + OptixDenoiserAlphaMode denoiseAlpha CUdeviceptr hdrIntensity float blendFactor CUdeviceptr hdrAverageColor + unsigned int temporalModeUsePreviousLayers + cdef enum OptixPixelFormat: OPTIX_PIXEL_FORMAT_HALF2 @@ -64,6 +68,8 @@ cdef extern from "optix_includes.h" nogil: OptixImage2D albedo OptixImage2D normal OptixImage2D flow + OptixImage2D previousOutputInternalGuideLayer + OptixImage2D outputInternalGuideLayer ctypedef struct OptixDenoiser: pass @@ -185,6 +191,9 @@ cdef class Denoiser(OptixContextObject): cdef size_t _state_size cdef object _d_scratch cdef size_t _scratch_size + cdef size_t _guide_layer_scratch_size + cdef size_t _intensity_scratch_size + cdef size_t _average_color_scratch_size cdef object _d_window cdef size_t _window_size cdef object _d_intensity diff --git a/optix/denoiser.pyx b/optix/denoiser.pyx index a751dea..7dc9f74 100644 --- a/optix/denoiser.pyx +++ b/optix/denoiser.pyx @@ -1,4 +1,5 @@ # distutils: language = c++ +import enum from .common cimport optix_check_return, optix_init from .context cimport DeviceContext @@ -7,13 +8,21 @@ import numpy as np from enum import IntEnum from libcpp.vector cimport vector from .common import ensure_iterable +from typing import Optional optix_init() __all__ = ['DenoiserModelKind', - 'Denoiser' + 'Denoiser', + 'DenoiserAlphaMode' ] +class DenoiserAlphaMode(enum.IntEnum): + COPY = OPTIX_DENOISER_ALPHA_MODE_COPY + ALPHA_AS_AOV = OPTIX_DENOISER_ALPHA_MODE_ALPHA_AS_AOV + FULL_DENOISE_PASS = OPTIX_DENOISER_ALPHA_MODE_FULL_DENOISE_PASS + + class DenoiserModelKind(IntEnum): """ Wraps the OptixDenoiserModelKind enum. @@ -22,15 +31,14 @@ class DenoiserModelKind(IntEnum): HDR = OPTIX_DENOISER_MODEL_KIND_HDR AOV = OPTIX_DENOISER_MODEL_KIND_AOV TEMPORAL = OPTIX_DENOISER_MODEL_KIND_TEMPORAL - - IF _OPTIX_VERSION > 70300: - TEMPORAL_AOV = OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV + TEMPORAL_AOV = OPTIX_DENOISER_MODEL_KIND_TEMPORAL_AOV + UPSCALE2X = OPTIX_DENOISER_MODEL_KIND_UPSCALE2X + TEMPORAL_UPSCALE2X = OPTIX_DENOISER_MODEL_KIND_TEMPORAL_UPSCALE2X def temporal_mode(self): - IF _OPTIX_VERSION > 70300: - return self == self.TEMPORAL or self == self.TEMPORAL_AOV - ELSE: - return self == self.TEMPORAL + return self == self.TEMPORAL or \ + self==self.TEMPORAL_AOV or \ + self == self.TEMPORAL_UPSCALE2X class PixelFormat(IntEnum): @@ -41,8 +49,6 @@ class PixelFormat(IntEnum): FLOAT3 = OPTIX_PIXEL_FORMAT_FLOAT3 FLOAT4 = OPTIX_PIXEL_FORMAT_FLOAT4 - - @classmethod def from_dtype_size(cls, dtype, size): try: @@ -144,6 +150,10 @@ cdef class Denoiser(OptixContextObject): self.kp_mode = kp_mode self.tile_size = tile_size self._scratch_size = 0 + + self._guide_layer_scratch_size = 0 + self._intensity_scratch_size = 0 + self._average_color_scratch_size = 0 self._state_size = 0 if model_kind is not None: @@ -188,6 +198,9 @@ cdef class Denoiser(OptixContextObject): self._state_size = return_sizes.stateSizeInBytes self._d_state = cp.cuda.alloc(return_sizes.stateSizeInBytes) + self._intensity_scratch_size = return_sizes.computeIntensitySizeInBytes + self._average_color_scratch_size = return_sizes.computeAverageColorSizeInBytes + cdef uintptr_t c_stream = 0 if stream is not None: @@ -203,8 +216,6 @@ cdef class Denoiser(OptixContextObject): self._d_scratch.ptr, self._scratch_size)) - - @classmethod def create_with_user_model(cls, DeviceContext context, unsigned char[::1] user_model not None): obj = cls(context, model_kind=None) @@ -221,9 +232,10 @@ cdef class Denoiser(OptixContextObject): normals=None, flow=None, outputs=None, - denoise_alpha=False, + denoise_alpha: DenoiserAlphaMode = DenoiserAlphaMode.COPY, blend_factor=0.0, - stream=None): + stream=None, + temporal_use_previous_layer=False): accepted_input_types = (PixelFormat.FLOAT3, PixelFormat.FLOAT3, PixelFormat.HALF3, PixelFormat.HALF4) inputs = [Image2D(inp, require_type=accepted_input_types) for inp in ensure_iterable(inputs)] @@ -284,11 +296,12 @@ cdef class Denoiser(OptixContextObject): self._init_denoiser(len(inputs), input_size, stream=stream) cdef OptixDenoiserParams params - params.denoiseAlpha = 1 if denoise_alpha else 0 params.hdrIntensity = self._d_intensity.ptr if self._d_intensity is not None else 0 params.hdrAverageColor = self._d_avg_color.ptr if self._d_avg_color is not None else 0 params.blendFactor = blend_factor + params.temporalModeUsePreviousLayers = 1 if temporal_use_previous_layer and temporal_mode else 0 + params.denoiseAlpha = denoise_alpha.value cdef uintptr_t c_stream = 0 @@ -297,13 +310,14 @@ cdef class Denoiser(OptixContextObject): # determinhe intensity and avg color if needed if self._d_intensity is not None: + optix_check_return(optixDenoiserComputeIntensity( self.denoiser, c_stream, &layers[0].input, self._d_intensity.ptr, self._d_scratch.ptr, - self._scratch_size)) + self._intensity_scratch_size)) if self._d_avg_color is not None: optix_check_return(optixDenoiserComputeAverageColor( @@ -312,7 +326,7 @@ cdef class Denoiser(OptixContextObject): &layers[0].input, self._d_avg_color, self._d_scratch.ptr, - self._scratch_size)) + self._average_color_scratch_size)) if self.tile_size is None: diff --git a/optix/micromap.pxd b/optix/micromap.pxd new file mode 100644 index 0000000..157e485 --- /dev/null +++ b/optix/micromap.pxd @@ -0,0 +1,169 @@ +from libc.stdint cimport uint32_t +from libcpp.vector cimport vector +from libcpp.pair cimport pair +from .base cimport OptixObject +from .common cimport OptixResult, CUstream, CUdeviceptr +from .context cimport OptixDeviceContext, OptixContextObject + + +cdef extern from "optix_micromap.h" nogil: + cdef packed struct float2: + float x + float y + + void optixMicromapIndexToBaseBarycentrics(uint32_t microTriangleIndex, + uint32_t subdivisionLevel, + float2& baseBarycentrics0, + float2& baseBarycentrics1, + float2& baseBarycentrics2) + + +cdef extern from "optix.h" nogil: + cdef enum OptixOpacityMicromapArrayIndexingMode: + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE, + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR, + OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED + + + cdef enum OptixOpacityMicromapFlags: + OPTIX_OPACITY_MICROMAP_FLAG_NONE, + OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_TRACE, + OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_BUILD + + + cdef enum OptixOpacityMicromapFormat: + OPTIX_OPACITY_MICROMAP_FORMAT_NONE, + OPTIX_OPACITY_MICROMAP_FORMAT_2_STATE # 0: Transparent, 1: Opaque + OPTIX_OPACITY_MICROMAP_FORMAT_4_STATE # 0: Transparent, 1: Opaque, 2: Unknown-Transparent, 3: Unknown-Opaque + + + cdef struct OptixOpacityMicromapHistogramEntry: + unsigned int count + unsigned int subdivisionLevel + OptixOpacityMicromapFormat format + + + cdef struct OptixOpacityMicromapUsageCount: + unsigned int count + unsigned int subdivisionLevel + OptixOpacityMicromapFormat format + + + cdef struct OptixOpacityMicromapDesc: + unsigned int byteOffset + unsigned short subdivisionLevel + unsigned short format + + + # get the defines for the micromap state as constant variables to access them from cython + cdef const unsigned char OPTIX_OPACITY_MICROMAP_STATE_TRANSPARENT_DEFINE "OPTIX_OPACITY_MICROMAP_STATE_TRANSPARENT" # = 0 + cdef const unsigned char OPTIX_OPACITY_MICROMAP_STATE_OPAQUE_DEFINE "OPTIX_OPACITY_MICROMAP_STATE_OPAQUE" # = 1 + cdef const unsigned char OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT_DEFINE "OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT" # = 2 + cdef const unsigned char OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE_DEFINE "OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE" # = 3 + + cdef const unsigned long long OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT "OPTIX_OPACITY_MICROMAP_ARRAY_BUFFER_BYTE_ALIGNMENT" + + cdef const int OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_TRANSPARENT_DEFINE "OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_TRANSPARENT" # = 3 + cdef const int OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_OPAQUE_DEFINE "OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_OPAQUE" # = 3 + cdef const int OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_TRANSPARENT_DEFINE "OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_TRANSPARENT" # = 3 + cdef const int OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_OPAQUE_DEFINE "OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_OPAQUE" # = 3 + + + + cdef struct OptixOpacityMicromapArrayBuildInput: + OptixOpacityMicromapFlags flags + CUdeviceptr inputBuffer + CUdeviceptr perMicromapDescBuffer + unsigned int perMicromapDescStrideInBytes + unsigned int numMicromapHistogramEntries + const OptixOpacityMicromapHistogramEntry * micromapHistogramEntries + + + cdef struct OptixBuildInputOpacityMicromap: + OptixOpacityMicromapArrayIndexingMode indexingMode + CUdeviceptr opacityMicromapArray + CUdeviceptr indexBuffer + unsigned int indexSizeInBytes + unsigned int indexStrideInBytes + unsigned int indexOffset + unsigned int numMicromapUsageCounts + const OptixOpacityMicromapUsageCount * micromapUsageCounts + + + cdef struct OptixMicromapBufferSizes: + size_t outputSizeInBytes + size_t tempSizeInBytes + + + cdef struct OptixMicromapBuffers: + CUdeviceptr output + size_t outputSizeInBytes + CUdeviceptr temp + size_t tempSizeInBytes + + + cdef struct OptixRelocateInputOpacityMicromap: + CUdeviceptr opacityMicromapArray + + + cdef struct OptixRelocationInfo: + unsigned long long info[4] + + + OptixResult optixOpacityMicromapArrayBuild(OptixDeviceContext context, + CUstream stream, + const OptixOpacityMicromapArrayBuildInput * buildInput, + const OptixMicromapBuffers * buffers) + + + OptixResult optixOpacityMicromapArrayComputeMemoryUsage(OptixDeviceContext context, + const OptixOpacityMicromapArrayBuildInput * buildInput, + OptixMicromapBufferSizes * bufferSizes) + + + OptixResult optixOpacityMicromapArrayGetRelocationInfo(OptixDeviceContext context, + CUdeviceptr opacityMicromapArray, + OptixRelocationInfo * info) + + + OptixResult optixOpacityMicromapArrayRelocate(OptixDeviceContext context, + CUstream stream, + const OptixRelocationInfo * info, + CUdeviceptr targetOpacityMicromapArray, + size_t targetOpacityMicromapArraySizeInBytes) + + + OptixResult optixCheckRelocationCompatibility(OptixDeviceContext context, + const OptixRelocationInfo * info, + int * compatible + ) +# cdef extern from "" namespace "std" nogil: +# cdef cppclass std_hash "hash"[T]: +# function() except + +# bint operator()(const T&) const + + + +cdef class OpacityMicromapInput(OptixObject): + cdef object buffer + cdef OptixOpacityMicromapFormat c_format + cdef unsigned int c_subdivision_level + + +cdef class OpacityMicromapArray(OptixContextObject): + cdef object d_micromap_array_buffer + cdef OptixOpacityMicromapFlags _build_flags + cdef size_t _buffer_size + cdef unsigned int c_num_micromaps + cdef tuple _micromap_types + + cdef void build(self, inputs, stream=*) + + +cdef class BuildInputOpacityMicromap(OptixObject): + cdef OptixBuildInputOpacityMicromap build_input + cdef OpacityMicromapArray c_micromap_array + cdef object _d_index_buffer + cdef object _usage_counts + cdef vector[OptixOpacityMicromapUsageCount] c_usage_counts + diff --git a/optix/micromap.pyx b/optix/micromap.pyx new file mode 100644 index 0000000..fa237cd --- /dev/null +++ b/optix/micromap.pyx @@ -0,0 +1,541 @@ +# distutils: language = c++ + +import numpy as np +cimport numpy as np +np.import_array() + +import cupy as cp +from .common cimport optix_check_return, optix_init + +cimport cython +from cython.operator import dereference +from libc.stdint cimport uint8_t, uint16_t, uint32_t, uintptr_t +from libcpp cimport bool +from libcpp.vector cimport vector +from libc.string cimport memset +from collections import defaultdict, namedtuple +from collections.abc import Mapping, Sequence +from enum import IntEnum, IntFlag +import typing as typ +from .common import ensure_iterable +from .context cimport DeviceContext + +optix_init() + +__all__ = ['micromap_indices_to_base_barycentrics', + 'OpacityMicromapFormat', + 'OpacityMicromapState', + 'OpacityMicromapInput', + 'OpacityMicromapArray', + 'OpacityMicromapArrayIndexingMode', + 'BuildInputOpacityMicromap'] + + +cdef bool valid_subdivision_level(uint8_t[:, :] opacity): + return (np.log2(opacity.shape[1]) / 2).is_integer() + +cdef bool is_baked(uint8_t[:, :] opacity): + return opacity[0, 0] > 3 + + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +def micromap_indices_to_base_barycentrics(uint32_t[:] indices, uint32_t subdivision_level = 0): + """ + Converts micromap triangle indices to three base-triangle barycentric coordinates of the micro triangle vertices. + The base-triangle is the triangle that the micromap is applied to. + + Parameters + ---------- + indices: Indices of the micro triangles within a micromap. + subdivision_level: Subdivision level of the micromap. + + Returns + ------- + base_barycentrics_0: Barycentric coordinates in the space of the base triangle of vertex 0 of the micro triangle. + base_barycentrics_1: Barycentric coordinates in the space of the base triangle of vertex 1 of the micro triangle. + base_barycentrics_2: Barycentric coordinates in the space of the base triangle of vertex 2 of the micro triangle. + """ + cdef Py_ssize_t num_indices = indices.shape[0] + + barycentrics_0 = np.empty((num_indices, 2), dtype=np.float32) + barycentrics_1 = np.empty((num_indices, 2), dtype=np.float32) + barycentrics_2 = np.empty((num_indices, 2), dtype=np.float32) + + cdef float[:, ::1] barycentrics_0_view = barycentrics_0 + cdef float[:, ::1] barycentrics_1_view = barycentrics_1 + cdef float[:, ::1] barycentrics_2_view = barycentrics_2 + + cdef unsigned int i + with nogil: + for i in range(num_indices): + optixMicromapIndexToBaseBarycentrics(indices[i], + subdivision_level, + dereference(&barycentrics_0_view[i, 0]), + dereference(&barycentrics_1_view[i, 0]), + dereference(&barycentrics_2_view[i, 0])) + + return barycentrics_0, barycentrics_1, barycentrics_2 + + +@cython.boundscheck(False) # Deactivate bounds checking +@cython.wraparound(False) # Deactivate negative indexing. +@cython.cdivision(True) +def bake_opacity_micromap(uint8_t[:, :] opacity, format = None): + cdef Py_ssize_t num_tris = opacity.shape[0] + cdef Py_ssize_t num_micro_tris = opacity.shape[1] + + if not valid_subdivision_level(opacity): + raise ValueError(f"Shape of input ({opacity.shape[1]}) does " + f"not correspond to a valid subdivision level") + + cdef uint8_t bits_per_state + + if format is None: + if np.any(np.ravel(opacity) > 1): + bits_per_state = 2 + else: + bits_per_state = 1 + else: + bits_per_state = OpacityMicromapFormat(format).value + + # create the array to bake the opacities into + opacity_baked = np.zeros((opacity.shape[0], opacity.shape[1] // 16 * bits_per_state), dtype=np.uint16) + cdef unsigned int bake_stride = 16 // bits_per_state + + cdef uint16_t[:, :] opacity_baked_view = opacity_baked + cdef unsigned int baked_index + cdef unsigned int i, j + with nogil: + for i in range(num_tris): + for j in range(0, num_micro_tris): + baked_index = j // bake_stride + opacity_baked_view[i, baked_index] |= opacity[i, j] << ((j%bake_stride) * bits_per_state) + + return opacity_baked, OpacityMicromapFormat(bits_per_state) + + + + +class OpacityMicromapArrayIndexingMode(IntEnum): + NONE = OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_NONE, + LINEAR = OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_LINEAR, + INDEXED = OPTIX_OPACITY_MICROMAP_ARRAY_INDEXING_MODE_INDEXED + + +class OpacityMicromapFormat(IntEnum): + NONE = OPTIX_OPACITY_MICROMAP_FORMAT_NONE, # invalid format + TWO_STATE = OPTIX_OPACITY_MICROMAP_FORMAT_2_STATE # 0: Transparent, 1: Opaque + FOUR_STATE = OPTIX_OPACITY_MICROMAP_FORMAT_4_STATE # 0: Transparent, 1: Opaque, 2: Unknown-Transparent, 3: Unknown-Opaque + + +class OpacityMicromapState(IntEnum): + """ + This enum wraps the OPTIX_OPACITY_MICROMAP_STATE_* defines from optix + """ + TRANSPARENT = OPTIX_OPACITY_MICROMAP_STATE_TRANSPARENT_DEFINE + OPAQUE = OPTIX_OPACITY_MICROMAP_STATE_OPAQUE_DEFINE + UNKNOWN_TRANSPARENT = OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_TRANSPARENT_DEFINE + UNKNOWN_OPAQUE = OPTIX_OPACITY_MICROMAP_STATE_UNKNOWN_OPAQUE_DEFINE + + def bits_per_state(self): + """ + Returns the number of bits (either 1 or 2) required to encode this micromap state. + """ + if self < 2: + return 1 + return 2 + + def format(self): + """ + Returns the OpacityMicromapFormat to encode this state. This is either + OpacityMicromapFormat.TWO_STATE or OpacityMicromapFormat.FOUR_STATE. + """ + if self < 2: + return OpacityMicromapFormat.TWO_STATE + return OpacityMicromapFormat.FOUR_STATE + + +class OpacityMicromapPredefinedIndex(IntEnum): + """ + This enum wraps the OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_* defines from optix. + It provides the indices required to mark a triangle as either fully opaque or transparent in a + OpacityMicromapBuildInput. + """ + FULLY_TRANSPARENT = OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_TRANSPARENT_DEFINE + FULLY_OPAQUE = OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_OPAQUE_DEFINE + FULLY_UNKNOWN_TRANSPARENT = OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_TRANSPARENT_DEFINE + FULLY_UNKNOWN_OPAQUE = OPTIX_OPACITY_MICROMAP_PREDEFINED_INDEX_FULLY_UNKNOWN_OPAQUE_DEFINE + + +OpacityMicromapType = namedtuple("OpacityMicromapType", ["format", "subdivision_level"]) + + +cdef class OpacityMicromapInput(OptixObject): + """ + This class is a simple wrapper around an uint8-numpy array that will convert convert it into + the format required by the optix opacity micromaps while keeping track of the bit encoding and subdivision + level. The class supports inputs in an unbaked format (uint8 array with the values in OpacityMicromapFormat) as + well as baked formats (values encoded in either 1 or 2 bits). + + Parameters + ---------- + opacity: The input array in either unbaked or baked format. + format: Optional format specifier. Required for baked format, optional for unbaked. Invalid formats are not checked + for unbaked inputs. + """ + def __init__(self, + opacity, + format: typ.Optional[OpacityMicromapFormat] = None): + buffer = np.atleast_2d(opacity) + + format = OpacityMicromapFormat(format) if format is not None else None + if is_baked(buffer): + if not format: + raise ValueError("Baked input requires a format specification") + shape_unbaked = (buffer.dtype.itemsize * 8 * buffer.shape[1]) / format.value + subdivision_level = (np.log2(shape_unbaked) / 2) + + if not subdivision_level.is_integer(): + ValueError(f"Shape of baked input ({opacity.shape[1]}) does " + f"not correspond to a valid subdivision level in given format ({format}).") + else: + subdivision_level = (np.log2(buffer.shape[1]) / 2) + buffer, fmt = bake_opacity_micromap(buffer, format) + + # elif format != fmt: + # raise ValueError(f"Attempt to bake the micromap input resulted in a different format than the given one. " + # f"{format} != {fmt}.") + + self.buffer = buffer + self.c_format = format.value + self.c_subdivision_level = subdivision_level + + @property + def format(self): + return OpacityMicromapFormat(self.c_format) + + @property + def subdivision_level(self): + return self.c_subdivision_level + + @property + def ntriangles(self): + return self.buffer.shape[0] + + @property + def nbytes(self): + return self.buffer.size * self.buffer.itemsize + + def _repr_details(self): + return f"ntriangles={self.ntriangles}, format={self.format.name}, subdivision_level={self.subdivision_level}" +# ctypedef pair[OptixOpacityMicromapFormat, int] histogram_entry +# +# cdef bint histogram_entry_hash(const histogram_entry& s) nogil: +# return std_hash[OptixOpacityMicromapFormat]()(s.first) ^ std_hash[int]()(s.second) + + + +cdef class OpacityMicromapArray(OptixContextObject): + """ + Class representing an array of opacity micromaps on the optix device. + This class wraps the internal GPU buffer containing the micromap data and serves to build the structure from + one or multiple OpactiyMircomap inputs + + Parameters + ---------- + context: + The device context to use. + inputs: + An iterable of OpacityMicromapInput or numpy ndarrays. All numpy arrays will be converted into + OpacityMicroMapInput automatically in this class. + flags: + A set of OpacityMicromapFlags to use for building this array. If None, the default OpacityMicromapFlags.NONE + is used. + stream: + Cuda stream to use for building this micromap array. If None the default stream is used. + """ + def __init__(self, + context: DeviceContext, + inputs: typ.Iterable[typ.Union[np.ndarray, OpacityMicromapInput]], + prefer_fast_build: bool = False, + stream: typ.Optional[cp.cuda.Stream] = None): + super().__init__(context) + self.d_micromap_array_buffer = None + self._micromap_types = None + self._build_flags = OPTIX_OPACITY_MICROMAP_FLAG_NONE + if prefer_fast_build: + self._build_flags = OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_BUILD + else: + self._build_flags = OPTIX_OPACITY_MICROMAP_FLAG_PREFER_FAST_TRACE + self.build(inputs, stream=stream) + + cdef void build(self, inputs, stream=None): + # convert all inputs into the correct format first + inputs = [OpacityMicromapInput(inp) if not isinstance(inp, OpacityMicromapInput) + else inp for inp in ensure_iterable(inputs)] + + cdef OptixOpacityMicromapArrayBuildInput build_input + build_input.flags = self._build_flags + + cdef size_t inputs_size_in_bytes = 0 + micromap_counts = defaultdict(lambda: 0) + micromap_types = [] + + self.c_num_micromaps = 0 + # build the histogram from the input specifications and convert it into a cpp vector to pass it to the build input + for i in inputs: + omm_type = OpacityMicromapType(i.format, i.subdivision_level) + micromap_counts[omm_type] += i.ntriangles + micromap_types.append(omm_type) + inputs_size_in_bytes += i.nbytes + self.c_num_micromaps += i.ntriangles + self._micromap_types = tuple(micromap_types) + + cdef vector[OptixOpacityMicromapHistogramEntry] histogram_entries + histogram_entries.resize(len(micromap_counts)) + build_input.numMicromapHistogramEntries = histogram_entries.size() + + for i, (k, v) in enumerate(micromap_counts.items()): + histogram_entries[i].count = v + histogram_entries[i].format = k.format.value + histogram_entries[i].subdivisionLevel = k.subdivision_level + + build_input.micromapHistogramEntries = histogram_entries.data() + del micromap_counts + + # allocate a buffer to hold all input micromaps and put it's pointer in the build input + d_input_buffer = cp.cuda.alloc(inputs_size_in_bytes) + build_input.inputBuffer = d_input_buffer.ptr + + cdef unsigned int offset = 0 + cdef vector[OptixOpacityMicromapDesc] descs + cdef uint16_t[:, :] buffer_view + cdef unsigned int t + cdef unsigned int desc_i = 0; + + descs.resize(self.c_num_micromaps) + #TODO use the actual triangles in the input array here! + # copy all input data into the device buffer + for i, inp in enumerate(inputs): + buffer_view = (inp).buffer + num_bytes = inp.nbytes + cp.cuda.runtime.memcpy(d_input_buffer.ptr + offset, + &buffer_view[0,0], + num_bytes, + cp.cuda.runtime.memcpyHostToDevice) + for t in range(inp.ntriangles): + # fill the descriptor array at the same time with to information in input + descs[desc_i].byteOffset = offset + offset += buffer_view.shape[1] * sizeof(uint16_t) + descs[desc_i].subdivisionLevel = inp.subdivision_level + descs[desc_i].format = inp.format.value + desc_i += 1 + + # copy the descriptor array onto the device + cdef size_t desc_size_in_bytes = descs.size() * sizeof(OptixOpacityMicromapDesc) + + d_desc_buffer = cp.cuda.alloc(desc_size_in_bytes) + cp.cuda.runtime.memcpy(d_desc_buffer.ptr, descs.data(), desc_size_in_bytes, cp.cuda.runtime.memcpyHostToDevice) + + build_input.perMicromapDescBuffer = d_desc_buffer.ptr + build_input.perMicromapDescStrideInBytes = 0 + + cdef OptixMicromapBufferSizes build_sizes + + optix_check_return(optixOpacityMicromapArrayComputeMemoryUsage(self.context.c_context, + &build_input, + &build_sizes)) + # TODO: do we have to align this buffer? + self.d_micromap_array_buffer = cp.cuda.alloc(build_sizes.outputSizeInBytes) + self._buffer_size = build_sizes.outputSizeInBytes + + d_temp_buffer = cp.cuda.alloc(build_sizes.tempSizeInBytes) + + cdef OptixMicromapBuffers micromap_buffers + + micromap_buffers.tempSizeInBytes = build_sizes.tempSizeInBytes + micromap_buffers.temp = d_temp_buffer.ptr + + micromap_buffers.outputSizeInBytes = build_sizes.outputSizeInBytes + micromap_buffers.output = self.d_micromap_array_buffer.ptr + + cdef uintptr_t c_stream = 0 + + if stream is not None: + c_stream = stream.ptr + with nogil: + optix_check_return(optixOpacityMicromapArrayBuild(self.context.c_context, + c_stream, + &build_input, + µmap_buffers)) + # all temporary buffers will be freed automatically here + + @property + def types(self): + return self._micromap_types + + def __deepcopy__(self, memo): + """ + Perform a deep copy of the OpactiyMicromap by using the standard python copy.deepcopy function. + """ + # relocate on the same device to perform a regular deep copy + result = self.relocate(device=None) + memo[id(self)] = result + return result + + def _repr_details(self): + return f"size={self._buffer_size}, nmicromaps={self.c_num_micromaps}" + + def relocate(self, + device: typ.Optional[DeviceContext] = None, + stream: typ.Optional[cp.cuda.Stream] = None) -> OpacityMicromapArray: + """ + Relocate this opacity micromap array into another copy. Usually this is equivalent to a deep copy. + Additionally, the micromap array can be copied to a different device by specifying the device + parameter with a different DeviceContext. + + Parameters + ---------- + device: + An optional DeviceContext. The resulting copy of the micromap array will be copied + to this device. If None, the micromap array's current device is used. + stream: + The stream to use for the relocation. If None, the default stream (0) is used. + + Returns + ------- + copy: OpacityMicromapArray + The copy of the OpacityMicromapArray on the new device + """ + # first determine the relocation info for this micromap array + cdef OptixRelocationInfo micromap_info + memset(µmap_info, 0, sizeof(OptixRelocationInfo)) # init struct to 0 + + optix_check_return(optixOpacityMicromapArrayGetRelocationInfo(self.context.c_context, + self.d_micromap_array_buffer, µmap_info)) + + if device is None: + device = self.context + + # check if the new device is compatible with this micromap array + cdef int compatible = 0 + optix_check_return(optixCheckRelocationCompatibility((device).c_context, + µmap_info, + &compatible)) + + if compatible != 1: + raise RuntimeError("Device is not compatible for relocation of opacity micromap array") + + # do the relocation + cls = self.__class__ + cdef OpacityMicromapArray result = cls.__new__(cls) + + result.context = device + result._build_flags = self._build_flags + result._buffer_size = self._buffer_size + # TODO: align this? + result._gas_buffer = cp.cuda.alloc(result._buffer_size) + cp.cuda.runtime.memcpy(result.d_micromap_array_buffer.ptr, + self.d_micromap_array_buffer.ptr, + result._buffer_size, + cp.cuda.runtime.memcpyDeviceToDevice) + + cdef uintptr_t c_stream = 0 + if stream is not None: + c_stream = stream.ptr + + optix_check_return(optixOpacityMicromapArrayRelocate(result.context.c_context, + c_stream, + µmap_info, + result.d_micromap_array_buffer.ptr, + result._buffer_size)) + + return result + + +cdef class BuildInputOpacityMicromap(OptixObject): + """ + Build input for an array of micromaps. Inputs of this type can optionally be passed to a + BuildInputTriangleArray to use micromaps for it's triangles. Additionally an array of the usage_counts + for the OMMs in the Array needs to be passed as a list. + If the indexing mode is specified as INDEXED, an additional index buffer containing an index into the omm array or one of + the values in OpacityMicromapPredefinedIndex is required. + + Parameters + ---------- + omm_array: + The OpacityMicromapArray to use by the triangles. + usage_counts: + The number of times each omm in the OpacityMicromapArray is used. + indexing_mode: + The indexing mode the omms should use. Must be one of the values in OpacityMicromapArrayIndexingMode. + By default NONE is used, disabling the use of OMMs + index_buffer: + If the indexing_mode is INDEXED, this additional index buffer, specifiing an omm to use or the default value + for each triangle in the geometry is required. + """ + def __init__(self, + OpacityMicromapArray omm_array, + usage_counts: Sequence[int], + indexing_mode: OpacityMicromapArrayIndexingMode = OpacityMicromapArrayIndexingMode.NONE, + index_buffer = None): + indexing_mode = OpacityMicromapArrayIndexingMode(indexing_mode) + + if indexing_mode == OpacityMicromapArrayIndexingMode.INDEXED: + if index_buffer is None: + raise ValueError("index_buffer is required for indexing_mode=INDEXED.") + if not any(np.issubdtype(index_buffer.dtype, dt) for dt in (np.uint16, np.uint32)): + raise ValueError("index_buffer must be of dtype np.uint16 or np.uint32") + self._d_index_buffer = cp.asarray(index_buffer).ravel() + # enable the index buffer in the build_input struct + self.build_input.indexBuffer = self._d_index_buffer.data.ptr + self.build_input.indexOffset = 0 + self.build_input.indexSizeInBytes = self._d_index_buffer.itemsize + self.build_input.indexStrideInBytes = self._d_index_buffer.strides[0] + else: + self.build_input.indexBuffer = 0 + self.build_input.indexOffset = 0 + self.build_input.indexSizeInBytes = 0 + self.build_input.indexStrideInBytes = 0 + + self.build_input.indexingMode = indexing_mode.value + self.c_micromap_array = omm_array + self.build_input.opacityMicromapArray = self.c_micromap_array.d_micromap_array_buffer.ptr + + # fill the usage counts vector from the specified usage array + # TODO: is there a way to determine the usage count automatically? + micromap_types = self.c_micromap_array.types + if len(usage_counts) != len(micromap_types): + raise IndexError(f"Number of entries in usage_count must be equal to the number of omms in micromap_array. " + f"Expected {len(micromap_types)}), got ({len(usage_counts)})") + usage_count_hist = defaultdict(lambda: 0) + + for type, usage_count in zip(micromap_types, usage_counts): + usage_count_hist[type] += usage_count + + self._usage_counts = usage_count_hist + self.c_usage_counts.resize(len(usage_counts)) + for i, (k, v) in enumerate(usage_count_hist.items()): + self.c_usage_counts[i].count = v + self.c_usage_counts[i].format = k.format.value + self.c_usage_counts[i].subdivisionLevel = k.subdivision_level + + self.build_input.micromapUsageCounts = self.c_usage_counts.data() + self.build_input.numMicromapUsageCounts = self.c_usage_counts.size() + + @property + def usage_counts(self): + return self._usage_counts + + @property + def types(self): + return self.c_micromap_array.types + + @property + def micromap_array(self): + return self.c_micromap_array + + @property + def index_buffer(self): + return self._d_index_buffer \ No newline at end of file diff --git a/optix/module.pxd b/optix/module.pxd index 94bb927..e5ba795 100644 --- a/optix/module.pxd +++ b/optix/module.pxd @@ -28,60 +28,48 @@ cdef extern from "optix_includes.h" nogil: const char* annotation - IF _OPTIX_VERSION > 70300: # switch to new version - cdef size_t OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT - - cdef enum OptixPayloadSemantics: - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, - OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, - OPTIX_PAYLOAD_SEMANTICS_CH_NONE, - OPTIX_PAYLOAD_SEMANTICS_CH_READ, - OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, - OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, - OPTIX_PAYLOAD_SEMANTICS_MS_NONE, - OPTIX_PAYLOAD_SEMANTICS_MS_READ, - OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, - OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, - OPTIX_PAYLOAD_SEMANTICS_AH_NONE, - OPTIX_PAYLOAD_SEMANTICS_AH_READ, - OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, - OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, - OPTIX_PAYLOAD_SEMANTICS_IS_NONE, - OPTIX_PAYLOAD_SEMANTICS_IS_READ, - OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, - OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE, - - cdef struct OptixPayloadType: - unsigned int numPayloadValues - const unsigned int* payloadSemantics - - cdef struct OptixModuleCompileOptions: - int maxRegisterCount - OptixCompileOptimizationLevel optLevel - OptixCompileDebugLevel debugLevel - const OptixModuleCompileBoundValueEntry* boundValues - unsigned int numBoundValues - unsigned int numPayloadTypes - OptixPayloadType* payloadTypes - - cdef struct OptixBuiltinISOptions: - OptixPrimitiveType builtinISModuleType - int usesMotionBlur - unsigned int buildFlags - unsigned int curveEndcapFlags - ELSE: - cdef struct OptixModuleCompileOptions: - int maxRegisterCount - OptixCompileOptimizationLevel optLevel - OptixCompileDebugLevel debugLevel - const OptixModuleCompileBoundValueEntry* boundValues - unsigned int numBoundValues - - cdef struct OptixBuiltinISOptions: - OptixPrimitiveType builtinISModuleType - int usesMotionBlur + cdef size_t OPTIX_COMPILE_DEFAULT_MAX_PAYLOAD_TYPE_COUNT + + cdef enum OptixPayloadSemantics: + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, + OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_CH_NONE, + OPTIX_PAYLOAD_SEMANTICS_CH_READ, + OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, + OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_MS_NONE, + OPTIX_PAYLOAD_SEMANTICS_MS_READ, + OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, + OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_AH_NONE, + OPTIX_PAYLOAD_SEMANTICS_AH_READ, + OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, + OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, + OPTIX_PAYLOAD_SEMANTICS_IS_NONE, + OPTIX_PAYLOAD_SEMANTICS_IS_READ, + OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, + OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE, + + cdef struct OptixPayloadType: + unsigned int numPayloadValues + const unsigned int* payloadSemantics + + cdef struct OptixModuleCompileOptions: + int maxRegisterCount + OptixCompileOptimizationLevel optLevel + OptixCompileDebugLevel debugLevel + const OptixModuleCompileBoundValueEntry* boundValues + unsigned int numBoundValues + unsigned int numPayloadTypes + OptixPayloadType* payloadTypes + + cdef struct OptixBuiltinISOptions: + OptixPrimitiveType builtinISModuleType + int usesMotionBlur + unsigned int buildFlags + unsigned int curveEndcapFlags OptixResult optixModuleCreateFromPTX(OptixDeviceContext context, @@ -104,52 +92,50 @@ cdef extern from "optix_includes.h" nogil: OptixModule *builtinModule) - IF _OPTIX_VERSION > 70300: # switch to new version - ctypedef struct OptixTask: - pass + ctypedef struct OptixTask: + pass - cdef enum OptixModuleCompileState: - OPTIX_MODULE_COMPILE_STATE_NOT_STARTED - OPTIX_MODULE_COMPILE_STATE_STARTED - OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE - OPTIX_MODULE_COMPILE_STATE_FAILED - OPTIX_MODULE_COMPILE_STATE_COMPLETED + cdef enum OptixModuleCompileState: + OPTIX_MODULE_COMPILE_STATE_NOT_STARTED + OPTIX_MODULE_COMPILE_STATE_STARTED + OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE + OPTIX_MODULE_COMPILE_STATE_FAILED + OPTIX_MODULE_COMPILE_STATE_COMPLETED - cdef OptixResult optixModuleGetCompilationState(OptixModule module, - OptixModuleCompileState * state) + cdef OptixResult optixModuleGetCompilationState(OptixModule module, + OptixModuleCompileState * state) - cdef OptixResult optixModuleCreateFromPTXWithTasks(OptixDeviceContext context, - const OptixModuleCompileOptions * moduleCompileOptions, - const OptixPipelineCompileOptions * pipelineCompileOptions, - const char * PTX, - size_t PTXsize, - char * logString, - size_t * logStringSize, - OptixModule * module, - OptixTask * firstTask) + cdef OptixResult optixModuleCreateFromPTXWithTasks(OptixDeviceContext context, + const OptixModuleCompileOptions * moduleCompileOptions, + const OptixPipelineCompileOptions * pipelineCompileOptions, + const char * PTX, + size_t PTXsize, + char * logString, + size_t * logStringSize, + OptixModule * module, + OptixTask * firstTask) + + cdef OptixResult optixTaskExecute(OptixTask task, + OptixTask * additionalTasks, + unsigned int maxNumAdditionalTasks, + unsigned int *numAdditionalTasksCreated) - cdef OptixResult optixTaskExecute(OptixTask task, - OptixTask * additionalTasks, - unsigned int maxNumAdditionalTasks, - unsigned int *numAdditionalTasksCreated) cdef class BuiltinISOptions(OptixObject): cdef OptixBuiltinISOptions options + cdef class Module(OptixContextObject): cdef OptixModule module cdef list _compile_flags -IF _OPTIX_VERSION > 70300: # switch to new version - cdef class ModuleCompileOptions(OptixObject): - cdef OptixModuleCompileOptions compile_options - cdef vector[OptixPayloadType] payload_types - cdef vector[vector[unsigned int]] payload_values # WTF! - - cdef class Task(OptixObject): - cdef OptixTask task - cdef Module module -ELSE: - cdef class ModuleCompileOptions(OptixObject): - cdef OptixModuleCompileOptions compile_options \ No newline at end of file + +cdef class ModuleCompileOptions(OptixObject): + cdef OptixModuleCompileOptions compile_options + cdef vector[OptixPayloadType] payload_types + cdef vector[vector[unsigned int]] payload_values # WTF! + +cdef class Task(OptixObject): + cdef OptixTask task + cdef Module module \ No newline at end of file diff --git a/optix/module.pyx b/optix/module.pyx index e991838..a5ea10b 100644 --- a/optix/module.pyx +++ b/optix/module.pyx @@ -2,7 +2,8 @@ from enum import IntEnum, IntFlag import os -from .path_utility import get_cuda_include_path, get_optix_include_path +import warnings +from .path_utility import get_cuda_include_path, get_optix_include_path, get_local_optix_include_path from .common cimport optix_check_return, optix_init from .context cimport DeviceContext from .pipeline cimport PipelineCompileOptions @@ -14,6 +15,9 @@ from libcpp.vector cimport vector optix_init() +__all__ = ['Module', 'ModuleCompileOptions', 'CompileOptimizationLevel', 'CompileDebugLevel', + 'PayloadSemantics', 'Task'] + class CompileOptimizationLevel(IntEnum): """ Wraps the OptixCompileOptimizationLevel enum @@ -24,93 +28,92 @@ class CompileOptimizationLevel(IntEnum): LEVEL_2 = OPTIX_COMPILE_OPTIMIZATION_LEVEL_2, LEVEL_3 = OPTIX_COMPILE_OPTIMIZATION_LEVEL_3, -IF _OPTIX_VERSION > 70300: - class PayloadSemantics(IntFlag): - """ - Wraps the PayloadSemantics enum. - """ +class PayloadSemantics(IntFlag): + """ + Wraps the PayloadSemantics enum. + """ + + DEFAULT = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE # allow everything as default + TRACE_CALLER_NONE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, + TRACE_CALLER_READ = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, + TRACE_CALLER_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, + TRACE_CALLER_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, + CH_NONE = OPTIX_PAYLOAD_SEMANTICS_CH_NONE, + CH_READ = OPTIX_PAYLOAD_SEMANTICS_CH_READ, + CH_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, + CH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, + MS_NONE = OPTIX_PAYLOAD_SEMANTICS_MS_NONE, + MS_READ = OPTIX_PAYLOAD_SEMANTICS_MS_READ, + MS_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, + MS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, + AH_NONE = OPTIX_PAYLOAD_SEMANTICS_AH_NONE, + AH_READ = OPTIX_PAYLOAD_SEMANTICS_AH_READ, + AH_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, + AH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, + IS_NONE = OPTIX_PAYLOAD_SEMANTICS_IS_NONE, + IS_READ = OPTIX_PAYLOAD_SEMANTICS_IS_READ, + IS_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, + IS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE + +class ModuleCompileState(IntFlag): + NOT_STARTED = OPTIX_MODULE_COMPILE_STATE_NOT_STARTED, + STARTED = OPTIX_MODULE_COMPILE_STATE_STARTED, + IMPENDING_FAILURE = OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE, + FAILED = OPTIX_MODULE_COMPILE_STATE_FAILED, + COMPLETED = OPTIX_MODULE_COMPILE_STATE_COMPLETED, + + +cdef class Task(OptixObject): + """ + Class to represent a parallel Task to compile an OptiX module. + A Task can be executed in parallel by e.g. a thread pool to handle lots of module compilations concurrently. + It is only valid as long as the corresponding module exists, therefore in this wrapper a reference to the module + if stored. + + Note, that a Task is not supposed to be created by the user directly, but provided by the create_as_task method + of the Module class. - DEFAULT = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE | OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE # allow everything as default - TRACE_CALLER_NONE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_NONE, - TRACE_CALLER_READ = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ, - TRACE_CALLER_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_WRITE, - TRACE_CALLER_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_TRACE_CALLER_READ_WRITE, - CH_NONE = OPTIX_PAYLOAD_SEMANTICS_CH_NONE, - CH_READ = OPTIX_PAYLOAD_SEMANTICS_CH_READ, - CH_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_WRITE, - CH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_CH_READ_WRITE, - MS_NONE = OPTIX_PAYLOAD_SEMANTICS_MS_NONE, - MS_READ = OPTIX_PAYLOAD_SEMANTICS_MS_READ, - MS_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_WRITE, - MS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_MS_READ_WRITE, - AH_NONE = OPTIX_PAYLOAD_SEMANTICS_AH_NONE, - AH_READ = OPTIX_PAYLOAD_SEMANTICS_AH_READ, - AH_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_WRITE, - AH_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_AH_READ_WRITE, - IS_NONE = OPTIX_PAYLOAD_SEMANTICS_IS_NONE, - IS_READ = OPTIX_PAYLOAD_SEMANTICS_IS_READ, - IS_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_WRITE, - IS_READ_WRITE = OPTIX_PAYLOAD_SEMANTICS_IS_READ_WRITE - - class ModuleCompileState(IntFlag): - NOT_STARTED = OPTIX_MODULE_COMPILE_STATE_NOT_STARTED, - STARTED = OPTIX_MODULE_COMPILE_STATE_STARTED, - IMPENDING_FAILURE = OPTIX_MODULE_COMPILE_STATE_IMPENDING_FAILURE, - FAILED = OPTIX_MODULE_COMPILE_STATE_FAILED, - COMPLETED = OPTIX_MODULE_COMPILE_STATE_COMPLETED, - - - cdef class Task(OptixObject): + Parameters + ---------- + module: Module + The module this Task belongs to. + """ + def __init__(self, Module module): + self.module = module + self.task = NULL + + def execute(self, max_additional_tasks=2): """ - Class to represent a parallel Task to compile an OptiX module. - A Task can be executed in parallel by e.g. a thread pool to handle lots of module compilations concurrently. - It is only valid as long as the corresponding module exists, therefore in this wrapper a reference to the module - if stored. + Execute the Task. If more parallel work is found, it will be returned as a new list of Task objects. + The list has a maximum size of max_additional_tasks. - Note, that a Task is not supposed to be created by the user directly, but provided by the create_as_task method - of the Module class. + Node, that each Task can only be executed by a single thread. Parameters ---------- - module: Module - The module this Task belongs to. + max_additional_tasks: int + The maximum number of new Tasks to create from this one + + Returns + ------- + tasks: List[Task] + The newly created tasks if any """ - def __init__(self, Module module): - self.module = module - self.task = NULL - - def execute(self, max_additional_tasks=2): - """ - Execute the Task. If more parallel work is found, it will be returned as a new list of Task objects. - The list has a maximum size of max_additional_tasks. - - Node, that each Task can only be executed by a single thread. - - Parameters - ---------- - max_additional_tasks: int - The maximum number of new Tasks to create from this one - - Returns - ------- - tasks: List[Task] - The newly created tasks if any - """ - cdef vector[OptixTask] additional_tasks - cdef unsigned int i - cdef unsigned int additional_tasks_created = 0 - cdef unsigned int max_num_additional_tasks = max_additional_tasks - - with nogil: - additional_tasks.resize(max_num_additional_tasks) - optix_check_return(optixTaskExecute(self.task, additional_tasks.data(), max_num_additional_tasks, &additional_tasks_created)) - - cdef list tasks = [] - for i in range(additional_tasks_created): - t = Task(self.module) - t.task = additional_tasks[i] - tasks.append(t) - return tasks + cdef vector[OptixTask] additional_tasks + cdef unsigned int i + cdef unsigned int additional_tasks_created = 0 + cdef unsigned int max_num_additional_tasks = max_additional_tasks + + with nogil: + additional_tasks.resize(max_num_additional_tasks) + optix_check_return(optixTaskExecute(self.task, additional_tasks.data(), max_num_additional_tasks, &additional_tasks_created)) + + cdef list tasks = [] + for i in range(additional_tasks_created): + t = Task(self.module) + t.task = additional_tasks[i] + tasks.append(t) + return tasks cdef class ModuleCompileOptions(OptixObject): @@ -132,25 +135,22 @@ cdef class ModuleCompileOptions(OptixObject): self.compile_options.numBoundValues = 0 self.compile_options.boundValues = NULL # currently not supported - IF _OPTIX_VERSION > 70300: - if payload_types is None: - self.compile_options.numPayloadTypes = 0 - self.compile_options.payloadTypes = NULL - else: - # set the payload types for these compile options (this is horrible, i know ;)) - payload_types = [ensure_iterable(pt) for pt in ensure_iterable(payload_types)] # list of lists - self.payload_types.resize(len(payload_types)) # the number of different payload types - self.payload_values.resize(self.payload_types.size()) # a vector of semantics for each payload type - self.compile_options.numPayloadTypes = self.payload_types.size() - for i, payload_values in enumerate(payload_types): - self.payload_types[i].numPayloadValues = len(payload_values) - self.payload_values[i].resize(self.payload_types[i].numPayloadValues) - for j, payload_semantics in enumerate(payload_values): - self.payload_values[i][j] = payload_semantics.value - self.payload_types[i].payloadSemantics = self.payload_values[i].data() - self.compile_options.payloadTypes = self.payload_types.data() - - + if payload_types is None: + self.compile_options.numPayloadTypes = 0 + self.compile_options.payloadTypes = NULL + else: + # set the payload types for these compile options (this is horrible, i know ;)) + payload_types = [ensure_iterable(pt) for pt in ensure_iterable(payload_types)] # list of lists + self.payload_types.resize(len(payload_types)) # the number of different payload types + self.payload_values.resize(self.payload_types.size()) # a vector of semantics for each payload type + self.compile_options.numPayloadTypes = self.payload_types.size() + for i, payload_values in enumerate(payload_types): + self.payload_types[i].numPayloadValues = len(payload_values) + self.payload_values[i].resize(self.payload_types[i].numPayloadValues) + for j, payload_semantics in enumerate(payload_values): + self.payload_values[i][j] = payload_semantics.value + self.payload_types[i].payloadSemantics = self.payload_values[i].data() + self.compile_options.payloadTypes = self.payload_types.data() @property def max_register_count(self): @@ -177,7 +177,7 @@ cdef class ModuleCompileOptions(OptixObject): self.compile_options.debugLevel = level.value -cdef tuple _nvrtc_compile_flags_default = ('-use_fast_math', '-lineinfo', '-default-device', '-std=c++11', '-rdc', 'true') +cdef tuple _nvrtc_compile_flags_default = ('-use_fast_math', '-default-device', '-std=c++11', '-rdc', 'true') def get_default_nvrtc_compile_flags(std=None, rdc=False): flags = list(_nvrtc_compile_flags_default[:-3]) @@ -198,25 +198,24 @@ cdef _is_ptx(src): cdef class BuiltinISOptions(OptixObject): def __init__(self, primitive_type, - build_flags=None, + build_flags, uses_motion_blur=False, curve_endcap_flags=None): self.options.builtinISModuleType = primitive_type.value self.options.usesMotionBlur = uses_motion_blur - IF _OPTIX_VERSION > 70300: - if build_flags is None: - raise ValueError("Parameter build_flags is required for OptiX versions >= 7.4.") - self.options.buildFlags = build_flags.value - if curve_endcap_flags is None: - curve_endcap_flags = CurveEndcapFlags.DEFAULT - self.options.curveEndcapFlags = curve_endcap_flags.value + self.options.buildFlags = build_flags.value + if curve_endcap_flags is None: + curve_endcap_flags = CurveEndcapFlags.DEFAULT + self.options.curveEndcapFlags = curve_endcap_flags.value cdef class Module(OptixContextObject): """ Class representing a Optix Cuda program that will be called during pipeline execution. Wraps the OptixModule struct. + TODO: support creating modules through nvcc instead of nvrtc as well to support the new optix-ir format in 7.5 + Parameters ---------- context: DeviceContext @@ -237,19 +236,23 @@ cdef class Module(OptixContextObject): src, ModuleCompileOptions module_compile_options = ModuleCompileOptions(), PipelineCompileOptions pipeline_compile_options = PipelineCompileOptions(), - compile_flags=_nvrtc_compile_flags_default, + compile_flags=None, program_name=None): super().__init__(context) cdef const char * c_ptx cdef unsigned int pipeline_payload_values, i + + if compile_flags is None: + compile_flags = _nvrtc_compile_flags_default + self._compile_flags = list(compile_flags) + if module_compile_options.debug_level != CompileDebugLevel.NONE: + self._compile_flags.append("-G") + self._compile_flags.append("-lineinfo") if src is not None: ptx = self.compile_cuda_ptx(src, compile_flags, name=program_name) c_ptx = ptx - #IF _OPTIX_VERSION > 70300: - # self._check_payload_values(module_compile_options, pipeline_compile_options) - optix_check_return(optixModuleCreateFromPTX(self.context.c_context, &module_compile_options.compile_options, &pipeline_compile_options.compile_options, @@ -258,89 +261,86 @@ cdef class Module(OptixContextObject): NULL, NULL, &self.module)) - def __dealloc__(self): if self.module != 0: optix_check_return(optixModuleDestroy(self.module)) - IF _OPTIX_VERSION > 70300: - @property - def compile_state(self): - cdef OptixModuleCompileState state - with nogil: - optix_check_return(optixModuleGetCompilationState(self.module, &state)) - return ModuleCompileState(state) - - # @staticmethod - # def _check_payload_values(ModuleCompileOptions module_compile_options, PipelineCompileOptions pipeline_compile_options): - # IF _OPTIX_VERSION > 70300: - # # check if the payload values match between the module and pipeline compile options - # pipeline_payload_values = pipeline_compile_options.compile_options.numPayloadValues - # if module_compile_options.payload_types.size() > 0: - # for i in range(module_compile_options.compile_options.numPayloadTypes): - # if pipeline_payload_values != module_compile_options.compile_options.payloadTypes[ - # i].numPayloadValues: - # raise ValueError( - # f"number of payload values in module compile options at index {i} does not match the num_payload_values in the pipeline_compile_options.") - # return - - @classmethod - def create_as_task(cls, - DeviceContext context, - src, - ModuleCompileOptions module_compile_options = ModuleCompileOptions(), - PipelineCompileOptions pipeline_compile_options = PipelineCompileOptions(), - compile_flags=_nvrtc_compile_flags_default, - program_name=None): - """ - Create a module associated with a parallel task. - The function will perform just enough work to instantiate the module. - Everything else will be done by the task on request. - - Parameters - ---------- - context: DeviceContext - The current OptiX context - src: str - Either a string containing the module's source code or PTX or the path to a file containing it. - module_compile_options: ModuleCompileOptions - Compile options of this module - pipeline_compile_options: PipelineCompileOptions - Compile options of the pipeline the module will be used in - compile_flags: list[str], optional - List of compiler flags to use. If omitted, the default flags are used. - program_name: str, optional - The name the program is given internally. Of omitted either the filename is used if given or a default name is used. - - Returns - ------- - - module: Module - The created module - task: Task - The task associated with this module - - """ - cdef Module module = Module(context, None, compile_flags=compile_flags) - cdef const char * c_ptx - cdef unsigned int pipeline_payload_values, i - #cls._check_payload_values(module_compile_options, pipeline_compile_options) - - ptx = cls.compile_cuda_ptx(src, compile_flags, name=program_name) - c_ptx = ptx + @property + def compile_state(self): + cdef OptixModuleCompileState state + with nogil: + optix_check_return(optixModuleGetCompilationState(self.module, &state)) + return ModuleCompileState(state) + + # @staticmethod + # def _check_payload_values(ModuleCompileOptions module_compile_options, PipelineCompileOptions pipeline_compile_options): + # # check if the payload values match between the module and pipeline compile options + # pipeline_payload_values = pipeline_compile_options.compile_options.numPayloadValues + # if module_compile_options.payload_types.size() > 0: + # for i in range(module_compile_options.compile_options.numPayloadTypes): + # if pipeline_payload_values != module_compile_options.compile_options.payloadTypes[ + # i].numPayloadValues: + # raise ValueError( + # f"number of payload values in module compile options at index {i} does not match the num_payload_values in the pipeline_compile_options.") + # return + + @classmethod + def create_as_task(cls, + DeviceContext context, + src, + ModuleCompileOptions module_compile_options = ModuleCompileOptions(), + PipelineCompileOptions pipeline_compile_options = PipelineCompileOptions(), + compile_flags=_nvrtc_compile_flags_default, + program_name=None): + """ + Create a module associated with a parallel task. + The function will perform just enough work to instantiate the module. + Everything else will be done by the task on request. + + Parameters + ---------- + context: DeviceContext + The current OptiX context + src: str + Either a string containing the module's source code or PTX or the path to a file containing it. + module_compile_options: ModuleCompileOptions + Compile options of this module + pipeline_compile_options: PipelineCompileOptions + Compile options of the pipeline the module will be used in + compile_flags: list[str], optional + List of compiler flags to use. If omitted, the default flags are used. + program_name: str, optional + The name the program is given internally. Of omitted either the filename is used if given or a default name is used. - cdef Task task = Task(module) + Returns + ------- + + module: Module + The created module + task: Task + The task associated with this module + + """ + cdef Module module = Module(context, None, compile_flags=compile_flags) + cdef const char * c_ptx + cdef unsigned int pipeline_payload_values, i + #cls._check_payload_values(module_compile_options, pipeline_compile_options) - optix_check_return(optixModuleCreateFromPTXWithTasks(context.c_context, - &module_compile_options.compile_options, - &pipeline_compile_options.compile_options, - c_ptx, - len(ptx) + 1, - NULL, - NULL, - &module.module, - &task.task)) - return module, task + ptx = module.compile_cuda_ptx(src, compile_flags, name=program_name) + c_ptx = ptx + + cdef Task task = Task(module) + + optix_check_return(optixModuleCreateFromPTXWithTasks(context.c_context, + &module_compile_options.compile_options, + &pipeline_compile_options.compile_options, + c_ptx, + len(ptx) + 1, + NULL, + NULL, + &module.module, + &task.task)) + return module, task @classmethod @@ -370,8 +370,7 @@ cdef class Module(OptixContextObject): """ cdef Module module = cls(context, None) - IF _OPTIX_VERSION > 70300: - cls._check_payload_values(module_compile_options, pipeline_compile_options) + #cls._check_payload_values(module_compile_options, pipeline_compile_options) optix_check_return(optixBuiltinISModuleGet(context.c_context, &module_compile_options.compile_options, &pipeline_compile_options.compile_options, @@ -379,26 +378,63 @@ cdef class Module(OptixContextObject): return module @staticmethod - def compile_cuda_ptx(src, compile_flags=_nvrtc_compile_flags_default, name=None, **kwargs): - if os.path.exists(src): - name = src + def get_default_nvrtc_compile_flags(std=None, rdc=False): + return get_default_nvrtc_compile_flags(std, rdc) + + + def compile_cuda_ptx(self, src, compile_flags=_nvrtc_compile_flags_default, name=None, **kwargs): + """ + Compiles a valid source module into the ptx format. Accepts files containing either source code, ptx, or + optix-ir code, compiles the source code if necessary and returns valid ptx or optix-ir modules. + + Parameters + ---------- + src: A string containing either the file name of the module to be compiled or the CUDA source code directly. + compile_flags: The flags used for the call to the nvptx compiler. If src is compiled already, this is ignored. + name: The name of the compiled module. If src is not an inline string, this is ignored and the file name is used + kwargs: Additional kwargs passed to the NVRTC compiler. See the _NVRTCProgram in the cupy package for details. + + Returns + ------- + ptx: A compiled ptx string or a string in optix-ir format. + """ + compiled = False + if os.path.isfile(src): + # if src is a file + name, ext = os.path.splitext(src) + if ext == '.ptx' or ext == '.optixir': + # if the file points to a compiled module (either in ptx or in optixir format) just return it's contents + compiled = True + # read the file contents with open(src, 'r') as f: src = f.read() - if _is_ptx(src): - return src + elif _is_ptx(src): + # if the source is in ptx format already (e.g. as an inline string) just return it + compiled = True - elif name is None: + if name is None: name = "default_program" - # TODO is there a public API for that? - from cupy.cuda.compiler import _NVRTCProgram as NVRTCProgram - prog = NVRTCProgram(src, name, **kwargs) - flags = list(compile_flags) - # get cuda and optix_include_paths - cuda_include_path = get_cuda_include_path() - optix_include_path = get_optix_include_path() - - flags.extend([f'-I{cuda_include_path}', f'-I{optix_include_path}']) - ptx, _ = prog.compile(flags) - return ptx + if not compiled: + # TODO is there a public API for that? + from cupy.cuda.compiler import _NVRTCProgram as NVRTCProgram + prog = NVRTCProgram(src, name, **kwargs) + flags = list(compile_flags) + # get cuda and optix_include_paths + cuda_include_path = get_cuda_include_path() + optix_include_path = get_local_optix_include_path() + if optix_include_path is None or not os.path.exists(optix_include_path): + # attempt to load the global path if the local path is not available + optix_include_path = get_optix_include_path() + if optix_include_path is None: + raise ValueError("Unable to locate the optix headers. Make sure that either the OPTIX_PATH environement variable is set" + "correctly or the optix headers are embedded into this package.") + if self.context.log_callback is not None: + # hook into the logging system for this output + self.context.log_callback(4, "build", f"Using optix include path: {optix_include_path}") + flags.extend([f'-I{cuda_include_path}', f'-I{optix_include_path}']) + ptx, _ = prog.compile(flags) + return ptx + else: + return src diff --git a/optix/optix_includes.h b/optix/optix_includes.h index 9750de6..c8589b5 100644 --- a/optix/optix_includes.h +++ b/optix/optix_includes.h @@ -1,5 +1,9 @@ #pragma once +#if defined(_MSC_VER) +#define NOMINMAX +#endif + #include #include #include @@ -13,4 +17,4 @@ inline void optix_check_return(OptixResult result) { ss << ": " << optixGetErrorString(result); throw std::runtime_error(ss.str()); } -} \ No newline at end of file +} diff --git a/optix/path_utility.py b/optix/path_utility.py index 756a756..32f9c20 100644 --- a/optix/path_utility.py +++ b/optix/path_utility.py @@ -25,6 +25,7 @@ import os from itertools import chain +import pathlib _cuda_path_cache = 'NOT_INITIALIZED' _optix_path_cache = 'NOT_INITIALIZED' @@ -38,7 +39,9 @@ def get_path(key): return tuple() -def search_on_path(filenames, keys=('PATH',)): +def search_on_path(filenames, keys=None): + if keys is None: + keys = ('PATH',) for p in chain(*[get_path(key) for key in keys]): for filename in filenames: full = os.path.abspath(os.path.join(p, filename)) @@ -47,7 +50,7 @@ def search_on_path(filenames, keys=('PATH',)): return None -def get_cuda_path(environment_variable='CUDA_ROOT'): +def get_cuda_path(environment_variable=None): global _cuda_path_cache # Use a magic word to represent the cache not filled because None is a @@ -55,7 +58,8 @@ def get_cuda_path(environment_variable='CUDA_ROOT'): if _cuda_path_cache != 'NOT_INITIALIZED': return _cuda_path_cache - nvcc_path = search_on_path(('nvcc', 'nvcc.exe'), keys=(environment_variable, 'PATH')) + nvcc_path = search_on_path(('nvcc', 'nvcc.exe'), keys=(environment_variable, 'PATH') if environment_variable is not + None else ('PATH',)) cuda_path_default = None if nvcc_path is not None: cuda_path_default = os.path.normpath( @@ -70,7 +74,7 @@ def get_cuda_path(environment_variable='CUDA_ROOT'): return _cuda_path_cache -def get_cuda_include_path(environment_variable='CUDA_ROOT'): +def get_cuda_include_path(environment_variable=None): cuda_path = get_cuda_path(environment_variable=environment_variable) if cuda_path is None: return None @@ -81,7 +85,7 @@ def get_cuda_include_path(environment_variable='CUDA_ROOT'): return None -def get_optix_path(environment_variable='OPTIX_PATH'): +def get_optix_path(path_hint=None, environment_variable=None): global _optix_path_cache # Use a magic word to represent the cache not filled because None is a @@ -89,14 +93,20 @@ def get_optix_path(environment_variable='OPTIX_PATH'): if _optix_path_cache != 'NOT_INITIALIZED': return _optix_path_cache - # prefer the dedicated environment variable - optix_header_path = search_on_path(('include/optix.h',), keys=(environment_variable,)) - if optix_header_path is None: - # search on the default path - optix_header_path = search_on_path(('../optix/include/optix.h',), keys=('PATH',)) + if path_hint is None: + # prefer the dedicated environment variable + optix_header_path = search_on_path(('include/optix.h',), keys=(environment_variable,) if environment_variable is not + None else None) + if optix_header_path is None: + # search on the default path + optix_header_path = search_on_path(('../optix/include/optix.h',), keys=('PATH', 'OPTIX_PATH')) - if optix_header_path is not None: - optix_header_path = os.path.normpath(os.path.join(os.path.dirname(optix_header_path), '..')) + if optix_header_path is not None: + optix_header_path = os.path.normpath(os.path.join(os.path.dirname(optix_header_path), '..')) + else: + optix_header_path = path_hint + if not os.path.exists(os.path.join(optix_header_path, "include/optix.h")): + raise ValueError(f"Path {optix_header_path} does not contain an optix installation.") if optix_header_path is not None: _optix_path_cache = optix_header_path @@ -106,7 +116,12 @@ def get_optix_path(environment_variable='OPTIX_PATH'): return _optix_path_cache -def get_optix_include_path(environment_variable='OPTIX_PATH'): +def get_local_optix_include_path(): + local_include_path = pathlib.Path(__file__).parent / "include" + return str(local_include_path) if local_include_path.exists() else None + + +def get_optix_include_path(environment_variable=None): optix_path = get_optix_path(environment_variable=environment_variable) if optix_path is None: return None @@ -115,3 +130,4 @@ def get_optix_include_path(environment_variable='OPTIX_PATH'): return optix_include_path else: return None + diff --git a/optix/pipeline.pxd b/optix/pipeline.pxd index 0806015..5e072a2 100644 --- a/optix/pipeline.pxd +++ b/optix/pipeline.pxd @@ -23,35 +23,22 @@ cdef extern from "optix_includes.h" nogil: OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS, OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING + cdef enum OptixPrimitiveTypeFlags: + OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, + OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM, + OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE, + OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE, - IF _OPTIX_VERSION > 70300: # switch to new instance flags - cdef enum OptixCompileDebugLevel: - OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - OPTIX_COMPILE_DEBUG_LEVEL_NONE, - OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, - OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, - OPTIX_COMPILE_DEBUG_LEVEL_FULL - - cdef enum OptixPrimitiveTypeFlags: - OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM, - OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE, - ELSE: - cdef enum OptixCompileDebugLevel: - OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - OPTIX_COMPILE_DEBUG_LEVEL_NONE, - OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, - OPTIX_COMPILE_DEBUG_LEVEL_FULL - - cdef enum OptixPrimitiveTypeFlags: - OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, - OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, - OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE, + + cdef enum OptixCompileDebugLevel: + OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, + OPTIX_COMPILE_DEBUG_LEVEL_NONE, + OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, + OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, + OPTIX_COMPILE_DEBUG_LEVEL_FULL cdef struct OptixPipelineCompileOptions: @@ -62,6 +49,7 @@ cdef extern from "optix_includes.h" nogil: unsigned int exceptionFlags const char * pipelineLaunchParamsVariableName unsigned int usesPrimitiveTypeFlags + int allowOpacityMicromaps cdef struct OptixPipelineLinkOptions: diff --git a/optix/pipeline.pyx b/optix/pipeline.pyx index c471b0d..3a30a93 100644 --- a/optix/pipeline.pyx +++ b/optix/pipeline.pyx @@ -15,6 +15,9 @@ from .shader_binding_table cimport ShaderBindingTable optix_init() +__all__ = ['CompileDebugLevel', 'ExceptionFlags', 'TraversableGraphFlags', 'PrimitiveTypeFlags', + 'PipelineCompileOptions', 'PipelineLinkOptions', 'Pipeline'] + class ExceptionFlags(IntFlag): """ @@ -36,48 +39,29 @@ class TraversableGraphFlags(IntFlag): ALLOW_SINGLE_LEVEL_INSTANCING = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_LEVEL_INSTANCING -IF _OPTIX_VERSION > 70300: # switch to new instance flags - class CompileDebugLevel(IntEnum): - """ - Wraps the OptixCompileDebugLevel enum. - """ - DEFAULT = OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - NONE = OPTIX_COMPILE_DEBUG_LEVEL_NONE, - MINIMAL = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, - MODERATE = OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, - FULL = OPTIX_COMPILE_DEBUG_LEVEL_FULL +class PrimitiveTypeFlags(IntFlag): + """ + Wraps the OptixPrimitiveTypeFlags enum. + """ + DEFAULT = 0, # corresponds to CUSTOM | TRIANGLE + CUSTOM = OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, + ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, + ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, + ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR + ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM + SPHERE = OPTIX_PRIMITIVE_TYPE_FLAGS_SPHERE + TRIANGLE = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE # fixes negative number error - class PrimitiveTypeFlags(IntFlag): - """ - Wraps the OptixPrimitiveTypeFlags enum. - """ - DEFAULT = 0, # corresponds to CUSTOM | TRIANGLE - CUSTOM = OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, - ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, - ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, - ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, - ROUND_CATMULLROM = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CATMULLROM, - TRIANGLE = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE # fixes negative number error -ELSE: - class CompileDebugLevel(IntEnum): - """ - Wraps the OptixCompileDebugLevel enum. - """ - DEFAULT = OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, - NONE = OPTIX_COMPILE_DEBUG_LEVEL_NONE, - LINEINFO = OPTIX_COMPILE_DEBUG_LEVEL_LINEINFO, - FULL = OPTIX_COMPILE_DEBUG_LEVEL_FULL - class PrimitiveTypeFlags(IntFlag): - """ - Wraps the OptixPrimitiveTypeFlags enum. - """ - DEFAULT = 0, # corresponds to CUSTOM | TRIANGLE - CUSTOM = OPTIX_PRIMITIVE_TYPE_FLAGS_CUSTOM, - ROUND_QUADRATIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_QUADRATIC_BSPLINE, - ROUND_CUBIC_BSPLINE = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_CUBIC_BSPLINE, - ROUND_LINEAR = OPTIX_PRIMITIVE_TYPE_FLAGS_ROUND_LINEAR, - TRIANGLE = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE +class CompileDebugLevel(IntEnum): + """ + Wraps the OptixCompileDebugLevel enum. + """ + DEFAULT = OPTIX_COMPILE_DEBUG_LEVEL_DEFAULT, + NONE = OPTIX_COMPILE_DEBUG_LEVEL_NONE, + MINIMAL = OPTIX_COMPILE_DEBUG_LEVEL_MINIMAL, + MODERATE = OPTIX_COMPILE_DEBUG_LEVEL_MODERATE, + FULL = OPTIX_COMPILE_DEBUG_LEVEL_FULL cdef class PipelineCompileOptions(OptixObject): @@ -93,7 +77,8 @@ cdef class PipelineCompileOptions(OptixObject): num_attribute_values = 0, exception_flags = ExceptionFlags.NONE, pipeline_launch_params_variable_name = "params", - uses_primitive_type_flags = PrimitiveTypeFlags.DEFAULT): + uses_primitive_type_flags = PrimitiveTypeFlags.DEFAULT, + allow_opacity_micromaps=False): self.uses_motion_blur = uses_motion_blur self.traversable_graph_flags = traversable_graph_flags self.num_payload_values = num_payload_values @@ -101,6 +86,7 @@ cdef class PipelineCompileOptions(OptixObject): self.exception_flags = exception_flags self.pipeline_launch_params_variable_name = pipeline_launch_params_variable_name self.uses_primitive_type_flags = uses_primitive_type_flags + self.allow_opacity_micromaps = allow_opacity_micromaps @property def uses_motion_blur(self): @@ -161,6 +147,14 @@ cdef class PipelineCompileOptions(OptixObject): def uses_primitive_type_flags(self, flags): self.compile_options.usesPrimitiveTypeFlags = flags.value + @property + def allow_opacity_micromaps(self): + return self.compile_options.allowOpacityMicromaps + + @allow_opacity_micromaps.setter + def allow_opacity_micromaps(self, allow): + self.compile_options.allowOpacityMicromaps = allow + @property def c_obj(self): return &self.compile_options diff --git a/optix/program_group.pyx b/optix/program_group.pyx index 2d95c24..09523fc 100644 --- a/optix/program_group.pyx +++ b/optix/program_group.pyx @@ -7,6 +7,8 @@ from libc.string cimport memset from enum import IntEnum optix_init() +__all__ = ['ProgramGroup', 'ProgramGroupKind'] + class ProgramGroupKind(IntEnum): """ Wraps the OptixProgramGroupKind enum diff --git a/optix/shader_binding_table.pyx b/optix/shader_binding_table.pyx index 4577339..8d6cf35 100644 --- a/optix/shader_binding_table.pyx +++ b/optix/shader_binding_table.pyx @@ -8,6 +8,9 @@ import numpy as np optix_init() +__all__ = ['ShaderBindingTable'] + + cdef class ShaderBindingTable(OptixObject): """ Represents a ShaderBindingTable, containing data used by the various programs in the OptiX Pipeline. diff --git a/optix/struct.pyx b/optix/struct.pyx index 7f1e53f..3c15488 100644 --- a/optix/struct.pyx +++ b/optix/struct.pyx @@ -10,6 +10,8 @@ from collections.abc import Mapping optix_init() +__all__ = ['SbtRecord', 'LaunchParamsRecord'] + def _aligned_itemsize( formats, alignment ): names = [] for i in range( len(formats ) ): @@ -18,8 +20,7 @@ def _aligned_itemsize( formats, alignment ): temp_dtype = np.dtype( { 'names' : names, 'formats' : formats, - 'align' : True - } ) + }, align=True) return round_up( temp_dtype.itemsize, alignment ) def array_to_device_memory(numpy_array, stream=None): @@ -175,9 +176,8 @@ cdef class _StructHelper: 'names': names, 'formats': formats, 'itemsize': itemsize, - 'align': True - }) - + }, align=True) + assert dtype.isalignedstruct return dtype def _prepare_array(self, array): diff --git a/pyproject.toml b/pyproject.toml index c5ff92b..20f0314 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,3 @@ [build-system] -requires = ["setuptools", "wheel", "Cython>=0.29.22,<3"] +requires = ["setuptools", "wheel", "Cython>=0.29.22,<3", "numpy"] build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.py b/setup.py index 2658567..f53caf9 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,12 @@ from setuptools import setup, Extension, find_packages from Cython.Build import cythonize import re +import os from pathlib import Path +import shutil +import numpy +OPTIX_COMPATIBLE_VERSION = (7, 6) # standalone import of a module (https://stackoverflow.com/a/58423785) def import_module_from_path(path): @@ -26,10 +30,16 @@ def import_module_from_path(path): util = import_module_from_path('optix/path_utility.py') -cuda_include_path = util.get_cuda_include_path() -optix_include_path = util.get_optix_include_path() -if cuda_include_path is None or optix_include_path is None: - raise RuntimeError("Cuda or optix not found in the system") +cuda_include_path = util.get_cuda_include_path(environment_variable='CUDA_PATH') +optix_include_path = util.get_optix_include_path(environment_variable='OPTIX_PATH') +print("Found cuda includes at", cuda_include_path) +print("Found optix includes at", optix_include_path) +if cuda_include_path is None: + raise RuntimeError("CUDA not found in the system, but is required to build this package. Consider setting" + "CUDA_PATH to the location of the local cuda toolkit installation.") +if optix_include_path is None: + raise RuntimeError("OptiX not found in the system, but is required to build this package. Consider setting " + "OPTIX_PATH to the location of the optix SDK.") optix_version_re = re.compile(r'.*OPTIX_VERSION +(\d{5})') # get the optix version from the header with open(Path(optix_include_path) / "optix.h", 'r') as f: @@ -40,25 +50,87 @@ def import_module_from_path(path): optix_version_minor = (optix_version % 10000) // 100 optix_version_micro = optix_version % 100 -print(f"Found OptiX version {optix_version_major}.{optix_version_minor}.{optix_version_micro}.") +if (optix_version_major, optix_version_minor) != OPTIX_COMPATIBLE_VERSION: + raise ValueError(f"Found unsupported optix version {optix_version_major}.{optix_version_minor}.{optix_version_micro}. This package" + f"requires an optix version of {OPTIX_COMPATIBLE_VERSION[0]}.{OPTIX_COMPATIBLE_VERSION[1]}.x.") cython_compile_env = { - '_OPTIX_VERSION': optix_version, '_OPTIX_VERSION_MAJOR': optix_version_major, '_OPTIX_VERSION_MINOR': optix_version_minor, '_OPTIX_VERSION_MICRO': optix_version_micro } +libraries=[] +if os.name == 'nt': + # OptiX uses some Windows Registry API(e.g. RegCloseKey) + libraries.append('advapi32') + extensions = [Extension("*", ["optix/*.pyx"], - include_dirs=[cuda_include_path, optix_include_path])] + include_dirs=[cuda_include_path, optix_include_path, numpy.get_include()], libraries=libraries)] extensions = cythonize(extensions, language_level="3", - compile_time_env=cython_compile_env, build_dir="build") + compile_time_env=cython_compile_env, build_dir="build", annotate=True) with open("README.md", "r", encoding="utf-8") as fh: long_description = fh.read() version = import_module_from_path('optix/_version.py').__version__ +package_data = {} + + +def glob_fix(package_name, glob): + # this assumes setup.py lives in the folder that contains the package + package_path = Path(f'./{package_name}').resolve() + return [str(path.relative_to(package_path)) + for path in package_path.glob(glob)] + +from setuptools.command.install import install as _install +from setuptools.command.develop import develop as _develop + +class EmbeddHeadersCommandMixin: + def update_package_data(self): + self.distribution.package_data.update({ + 'optix': [*glob_fix('optix', 'include/**/*')] + }) + print("embedding optix headers into package data", + self.distribution.package_data) + + def run(self): + embedd = os.getenv("OPTIX_EMBED_HEADERS") + if embedd: + # create the path for the internal headers + # due to optix license restrictions those headers + # cannot be distributed on pypi directly so we will add this headers dynamically + # upon wheel construction to install them alongside the package + + if not os.path.exists('optix/include/optix.h'): + shutil.copytree(optix_include_path, 'optix/include') + + self.update_package_data() + + super().run() + + +class CustomInstallCommand(EmbeddHeadersCommandMixin, _install): + pass + + +class CustomDevelopCommand(EmbeddHeadersCommandMixin, _develop): + pass + + +cmd_classes = {'install': CustomInstallCommand, + 'develop': CustomDevelopCommand} + +try: + from wheel.bdist_wheel import bdist_wheel as _bdist_wheel + + class CustomBdistWheelCommand(EmbeddHeadersCommandMixin, _bdist_wheel): + pass + cmd_classes['bdist_wheel'] = CustomBdistWheelCommand +except ImportError: + CustomBdistWheel = None + setup( name="python-optix", version=version, @@ -81,8 +153,10 @@ def import_module_from_path(path): classifiers=[ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "License :: OSI Approved :: MIT License", "Operating System :: POSIX :: Linux", + "Operating System :: Microsoft :: Windows", "Environment :: GPU :: NVIDIA CUDA", "Development Status :: 4 - Beta", "Intended Audience :: Science/Research", @@ -94,5 +168,7 @@ def import_module_from_path(path): 'examples': ["pillow", "pyopengl", "pyglfw", "pyimgui"] }, python_requires=">=3.8", - zip_safe=False + package_data=package_data, + zip_safe=False, + cmdclass=cmd_classes )