OWL
Loading...
Searching...
No Matches
owl_device.h
1// ======================================================================== //
2// Copyright 2019-2020 Ingo Wald //
3// //
4// Licensed under the Apache License, Version 2.0 (the "License"); //
5// you may not use this file except in compliance with the License. //
6// You may obtain a copy of the License at //
7// //
8// http://www.apache.org/licenses/LICENSE-2.0 //
9// //
10// Unless required by applicable law or agreed to in writing, software //
11// distributed under the License is distributed on an "AS IS" BASIS, //
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. //
13// See the License for the specific language governing permissions and //
14// limitations under the License. //
15// ======================================================================== //
16
17#pragma once
18
19#include "owl/common/math/vec.h"
20#include "owl/common/math/box.h"
21#include "owl/common/math/AffineSpace.h"
22// the 'actual' optix
23#include <cuda.h>
24#include <optix.h>
25#include <vector_types.h>
26
27// ==================================================================
28// actual device-side "API" built-ins.
29// ==================================================================
30
31#ifndef __CUDACC__
32# error "this file should only ever get included on the device side"
33#endif
34
35namespace owl {
36
37 using namespace owl::common;
38
39 inline __device__ vec2i getLaunchIndex()
40 {
41 return (vec2i)optixGetLaunchIndex();
42 }
43
46 inline __device__ vec2i getLaunchDims()
47 {
48 return (vec2i)optixGetLaunchDimensions();
49 }
50
55 inline __device__ const void *getProgramDataPointer()
56 {
57 return (const void*)optixGetSbtDataPointer();
58 }
59
64 template<typename T>
65 inline __device__ const T &getProgramData()
66 {
67 return *(const T*)getProgramDataPointer();
68 }
69
70
71 // ==================================================================
72 // general convenience/helper functions - may move to samples
73 // ==================================================================
74 inline __device__ float linear_to_srgb(float x) {
75 if (x <= 0.0031308f) {
76 return 12.92f * x;
77 }
78 return 1.055f * pow(x, 1.f/2.4f) - 0.055f;
79 }
80
81 inline __device__ uint32_t make_8bit(const float f)
82 {
83 return min(255,max(0,int(f*256.f)));
84 }
85
86 inline __device__ uint32_t make_rgba(const vec3f color)
87 {
88 return
89 (make_8bit(color.x) << 0) +
90 (make_8bit(color.y) << 8) +
91 (make_8bit(color.z) << 16) +
92 (0xffU << 24);
93 }
94 inline __device__ uint32_t make_rgba(const vec4f color)
95 {
96 return
97 (make_8bit(color.x) << 0) +
98 (make_8bit(color.y) << 8) +
99 (make_8bit(color.z) << 16) +
100 (make_8bit(color.w) << 24);
101 }
102
103 inline __device__ void initializeTransformToIdentity(float (&t)[12]) {
104 t[0] = 1.f; t[1] = 0.f; t[2] = 0.f; t[3] = 0.f;
105 t[4] = 0.f; t[5] = 1.f; t[6] = 0.f; t[7] = 0.f;
106 t[8] = 0.f; t[9] = 0.f; t[10] = 1.f; t[11] = 0.f;
107 }
108
109 inline __device__ void toRowMajor(affine3f tfm, float (&t)[12]) {
110 t[0] = tfm.l.vx.x; t[1] = tfm.l.vx.y; t[2] = tfm.l.vx.z; t[3] = tfm.p.x;
111 t[4] = tfm.l.vy.x; t[5] = tfm.l.vy.y; t[6] = tfm.l.vy.z; t[7] = tfm.p.y;
112 t[8] = tfm.l.vz.x; t[9] = tfm.l.vz.y; t[10] = tfm.l.vz.z; t[11] = tfm.p.z;
113 }
114
115 static __forceinline__ __device__ void* unpackPointer( uint32_t i0, uint32_t i1 )
116 {
117 const uint64_t uptr = static_cast<uint64_t>( i0 ) << 32 | i1;
118 void* ptr = reinterpret_cast<void*>( uptr );
119 return ptr;
120 }
121
122
123 static __forceinline__ __device__ void packPointer( void* ptr, uint32_t& i0, uint32_t& i1 )
124 {
125 const uint64_t uptr = reinterpret_cast<uint64_t>( ptr );
126 i0 = uptr >> 32;
127 i1 = uptr & 0x00000000ffffffff;
128 }
129
130
131 static __forceinline__ __device__ void *getPRDPointer()
132 {
133 const uint32_t u0 = optixGetPayload_0();
134 const uint32_t u1 = optixGetPayload_1();
135 return unpackPointer(u0, u1);
136 }
137
138 template<typename T>
139 static __forceinline__ __device__ T &getPRD()
140 { return *(T*)getPRDPointer(); }
141
142 template<int _rayType=0, int _numRayTypes=1, bool _disablePerGeometrySBTRecords=0>
143 struct RayT {
144 enum { rayType = _rayType };
145 enum { numRayTypes = _numRayTypes };
146 enum { disablePerGeometrySBTRecords = _disablePerGeometrySBTRecords };
147 inline __device__ RayT() {}
148 inline __device__ RayT(const vec3f &origin,
149 const vec3f &direction,
150 float tmin,
151 float tmax,
152 OptixVisibilityMask visibilityMask=(OptixVisibilityMask)(-1))
153 : origin(origin),
154 direction(direction),
155 tmin(tmin),
156 tmax(tmax),
157 visibilityMask(visibilityMask)
158 {}
159
160 vec3f origin, direction;
161 float tmin=0.f,tmax=1e30f,time=0.f;
162 OptixVisibilityMask visibilityMask=(OptixVisibilityMask)-1;
163 };
164 typedef RayT<0,1> Ray;
165
166
167 template<typename RayType, typename PRD>
168 inline __device__
169 void traceRay(OptixTraversableHandle traversable,
170 const RayType &ray,
171 PRD &prd,
172 uint32_t rayFlags = 0u)
173 {
174 unsigned int p0 = 0;
175 unsigned int p1 = 0;
176 owl::packPointer(&prd,p0,p1);
177
178 optixTrace(traversable,
179 (const float3&)ray.origin,
180 (const float3&)ray.direction,
181 ray.tmin,
182 ray.tmax,
183 ray.time,
184 ray.visibilityMask,
185 /*rayFlags */ rayFlags,
186 /*SBToffset */ ray.rayType,
187 /*SBTstride */ ray.numRayTypes * (ray.disablePerGeometrySBTRecords) ? 0 : 1,
188 /*missSBTIndex */ ray.rayType,
189 p0,
190 p1);
191 }
192
193 template<typename PRD>
194 inline __device__
195 void trace(OptixTraversableHandle traversable,
196 const Ray &ray,
197 int numRayTypes,
198 PRD &prd,
199 int sbtOffset = 0)
200 {
201 unsigned int p0 = 0;
202 unsigned int p1 = 0;
203 owl::packPointer(&prd,p0,p1);
204
205 optixTrace(traversable,
206 (const float3&)ray.origin,
207 (const float3&)ray.direction,
208 ray.tmin,
209 ray.tmax,
210 ray.time,
211 ray.visibilityMask,
212 /*rayFlags */0u,
213 /*SBToffset */ray.rayType + numRayTypes*sbtOffset,
214 /*SBTstride */numRayTypes,
215 /*missSBTIndex */ray.rayType,
216 p0,
217 p1);
218 }
219
220 template<typename PRD>
221 inline __device__
222 void trace(OptixTraversableHandle traversable,
223 const Ray &ray,
224 int numRayTypes,
225 bool disablePerGeometrySBTRecords,
226 PRD &prd,
227 int sbtOffset = 0)
228 {
229 unsigned int p0 = 0;
230 unsigned int p1 = 0;
231 owl::packPointer(&prd,p0,p1);
232
233 optixTrace(traversable,
234 (const float3&)ray.origin,
235 (const float3&)ray.direction,
236 ray.tmin,
237 ray.tmax,
238 ray.time,
239 ray.visibilityMask,
240 /*rayFlags */0u,
241 /*SBToffset */ray.rayType + numRayTypes*sbtOffset,
242 /*SBTstride */numRayTypes * (ray.disablePerGeometrySBTRecords) ? 0 : 1,
243 /*missSBTIndex */ray.rayType,
244 p0,
245 p1);
246 }
247
248} // ::owl
249
250#define OPTIX_RAYGEN_PROGRAM(programName) \
251 extern "C" __global__ \
252 void __raygen__##programName
253
254#define OPTIX_CLOSEST_HIT_PROGRAM(programName) \
255 extern "C" __global__ \
256 void __closesthit__##programName
257
258#define OPTIX_ANY_HIT_PROGRAM(programName) \
259 extern "C" __global__ \
260 void __anyhit__##programName
261
262#define OPTIX_INTERSECT_PROGRAM(programName) \
263 extern "C" __global__ \
264 void __intersection__##programName
265
266#define OPTIX_MISS_PROGRAM(programName) \
267 extern "C" __global__ \
268 void __miss__##programName
269
270/* defines the wrapper stuff to actually launch all the bounds
271 programs from the host - todo: move to deviceAPI.h once working */
272#ifndef OPTIX_BOUNDS_PROGRAM
273#define OPTIX_BOUNDS_PROGRAM(progName) \
274 /* fwd decl for the kernel func to call */ \
275 inline __device__ \
276 void __boundsFunc__##progName(const void *geomData, \
277 owl::common::box3f &bounds, \
278 const int32_t primID); \
279 \
280 /* the '__global__' kernel we can get a function handle on */ \
281 extern "C" __global__ \
282 void __boundsFuncKernel__##progName(const void *geomData, \
283 owl::common::box3f *const boundsArray, \
284 const uint32_t numPrims) \
285 { \
286 uint32_t blockIndex \
287 = blockIdx.x \
288 + blockIdx.y * gridDim.x \
289 + blockIdx.z * gridDim.x * gridDim.y; \
290 uint32_t primID \
291 = threadIdx.x + blockDim.x*threadIdx.y \
292 + blockDim.x*blockDim.y*blockIndex; \
293 if (primID < numPrims) { \
294 __boundsFunc__##progName(geomData,boundsArray[primID],primID); \
295 } \
296 } \
297 \
298 /* now the actual device code that the user is writing: */ \
299 inline __device__ void __boundsFunc__##progName \
300 /* program args and body supplied by user ... */
301#endif
302
303/* defines the wrapper stuff to actually launch all the bounds
304 programs from the host - todo: move to deviceAPI.h once working */
305#ifndef OPTIX_MOTION_BOUNDS_PROGRAM
306#define OPTIX_MOTION_BOUNDS_PROGRAM(progName) \
307 /* fwd decl for the kernel func to call */ \
308 inline __device__ \
309 void __motionBoundsFunc__##progName(const void *geomData, \
310 owl::common::box3f &boundskey1, \
311 owl::common::box3f &boundskey2, \
312 const int32_t primID); \
313 \
314 /* the '__global__' kernel we can get a function handle on */ \
315 extern "C" __global__ \
316 void __motionBoundsFuncKernel__##progName(const void *geomData, \
317 owl::common::box3f *const boundsArrayKey1, \
318 owl::common::box3f *const boundsArrayKey2, \
319 const uint32_t numPrims) \
320 { \
321 uint32_t blockIndex \
322 = blockIdx.x \
323 + blockIdx.y * gridDim.x \
324 + blockIdx.z * gridDim.x * gridDim.y; \
325 uint32_t primID \
326 = threadIdx.x + blockDim.x*threadIdx.y \
327 + blockDim.x*blockDim.y*blockIndex; \
328 if (primID < numPrims) { \
329 __motionBoundsFunc__##progName(geomData, \
330 boundsArrayKey1[primID], \
331 boundsArrayKey2[primID], \
332 primID); \
333 } \
334 } \
335 \
336 /* now the actual device code that the user is writing: */ \
337 inline __device__ void __motionBoundsFunc__##progName \
338 /* program args and body supplied by user ... */
339#endif
340
341/* defines a wrapper to a new program type which enables GPU-side
342 instance manipulation (transforms, visibility mask, etc). Assumes
343 that geometry contribution to hitgroup index is disabled. */
344#ifndef OPTIX_INSTANCE_PROGRAM
345#define OPTIX_INSTANCE_PROGRAM(progName) \
346 /* fwd decl for the kernel func to call */ \
347 inline __device__ \
348 void __instanceFunc__##progName( \
349 const int32_t instanceIndex, OptixInstance &instance); \
350 \
351 /* the '__global__' kernel we can get a function handle on */ \
352 extern "C" __global__ \
353 void __instanceFuncKernel__##progName( \
354 OptixInstance *insts, uint32_t numInsts, uint32_t numRayTypes) \
355 { \
356 uint32_t blockIndex \
357 = blockIdx.x \
358 + blockIdx.y * gridDim.x \
359 + blockIdx.z * gridDim.x * gridDim.y; \
360 uint32_t instanceIndex \
361 = threadIdx.x + blockDim.x*threadIdx.y \
362 + blockDim.x*blockDim.y*blockIndex; \
363 if (instanceIndex < numInsts) { \
364 OptixInstance oi = {}; \
365 /* defaults */ \
366 oi.flags = OPTIX_INSTANCE_FLAG_NONE; \
367 oi.instanceId = instanceIndex; \
368 oi.visibilityMask = 255; \
369 oi.traversableHandle = 0; /* if not set, ignored by builder */ \
370 initializeTransformToIdentity(oi.transform); \
371 __instanceFunc__##progName(instanceIndex, oi); \
372 insts[instanceIndex] = oi; \
373 } \
374 } \
375 \
376 /* now the actual device code that the user is writing: */ \
377 inline __device__ void __instanceFunc__##progName \
378 /* program args and body supplied by user ... */
379#endif
Definition: owl_device.h:143
Definition: AffineSpace.h:52