Merge branch 'master' into blender2.8

This commit is contained in:
Campbell Barton 2018-11-22 15:16:45 +11:00
commit 4b9d242be5
9 changed files with 247 additions and 283 deletions

@ -71,28 +71,23 @@ ccl_device_inline bool triangle_intersect(KernelGlobals *kg,
} }
#ifdef __KERNEL_AVX2__ #ifdef __KERNEL_AVX2__
#define cross256(A,B, C,D) _mm256_fmsub_ps(A,B, _mm256_mul_ps(C,D)) #define cross256(A,B, C,D) _mm256_fmsub_ps(A,B, _mm256_mul_ps(C,D))
#if defined(__KERNEL_CUDA__) && __CUDA_ARCH__ < 300 ccl_device_inline int ray_triangle_intersect8(
ccl_device_inline KernelGlobals *kg,
#else float3 ray_P,
ccl_device_forceinline float3 ray_dir,
#endif Intersection **isect,
int ray_triangle_intersect8(KernelGlobals *kg, uint visibility,
float3 ray_P, int object,
float3 ray_dir, __m256 *triA,
Intersection **isect, __m256 *triB,
uint visibility, __m256 *triC,
int object, int prim_addr,
__m256 *triA, int prim_num,
__m256 *triB, uint *num_hits,
__m256 *triC, uint max_hits,
int prim_addr, int *num_hits_in_instance,
int prim_num, float isect_t)
uint *num_hits,
uint max_hits,
int *num_hits_in_instance,
float isec_t)
{ {
const unsigned char prim_num_mask = (1 << prim_num) - 1; const unsigned char prim_num_mask = (1 << prim_num) - 1;
@ -108,10 +103,6 @@ int ray_triangle_intersect8(KernelGlobals *kg,
const __m256 dirz256 = _mm256_set1_ps(ray_dir.z); const __m256 dirz256 = _mm256_set1_ps(ray_dir.z);
/* Calculate vertices relative to ray origin. */ /* Calculate vertices relative to ray origin. */
/* const float3 v0 = tri_c - P;
const float3 v1 = tri_a - P;
const float3 v2 = tri_b - P; */
__m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256); __m256 v0_x_256 = _mm256_sub_ps(triC[0], Px256);
__m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256); __m256 v0_y_256 = _mm256_sub_ps(triC[1], Py256);
__m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256); __m256 v0_z_256 = _mm256_sub_ps(triC[2], Pz256);
@ -136,11 +127,7 @@ int ray_triangle_intersect8(KernelGlobals *kg,
__m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256); __m256 v1_v2_y_256 = _mm256_add_ps(v1_y_256, v2_y_256);
__m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256); __m256 v1_v2_z_256 = _mm256_add_ps(v1_z_256, v2_z_256);
/* Calculate triangle edges. /* Calculate triangle edges. */
const float3 e0 = v2 - v0;
const float3 e1 = v0 - v1;
const float3 e2 = v1 - v2;*/
__m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256); __m256 e0_x_256 = _mm256_sub_ps(v2_x_256, v0_x_256);
__m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256); __m256 e0_y_256 = _mm256_sub_ps(v2_y_256, v0_y_256);
__m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256); __m256 e0_z_256 = _mm256_sub_ps(v2_z_256, v0_z_256);
@ -153,48 +140,32 @@ int ray_triangle_intersect8(KernelGlobals *kg,
__m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256); __m256 e2_y_256 = _mm256_sub_ps(v1_y_256, v2_y_256);
__m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256); __m256 e2_z_256 = _mm256_sub_ps(v1_z_256, v2_z_256);
/* Perform edge tests. /* Perform edge tests. */
const float U = dot(cross(v2 + v0, e0), ray_dir); /* cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx) */
const float V = dot(cross(v0 + v1, e1), ray_dir);
const float W = dot(cross(v1 + v2, e2), ray_dir);*/
//cross (AyBz - AzBy, AzBx -AxBz, AxBy - AyBx)
__m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256); __m256 U_x_256 = cross256(v0_v2_y_256, e0_z_256, v0_v2_z_256, e0_y_256);
__m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256); __m256 U_y_256 = cross256(v0_v2_z_256, e0_x_256, v0_v2_x_256, e0_z_256);
__m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256); __m256 U_z_256 = cross256(v0_v2_x_256, e0_y_256, v0_v2_y_256, e0_x_256);
//vertical dot /* vertical dot */
__m256 U_256 = _mm256_mul_ps(U_x_256, dirx256); __m256 U_256 = _mm256_mul_ps(U_x_256, dirx256);
U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256); //_mm256_add_ps(U_256, _mm256_mul_ps(U_y_256, diry256)); U_256 = _mm256_fmadd_ps(U_y_256, diry256, U_256);
U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256); //_mm256_add_ps(U_256, _mm256_mul_ps(U_z_256, dirz256)); U_256 = _mm256_fmadd_ps(U_z_256, dirz256, U_256);
__m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256); __m256 V_x_256 = cross256(v0_v1_y_256, e1_z_256, v0_v1_z_256, e1_y_256);
__m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256); __m256 V_y_256 = cross256(v0_v1_z_256, e1_x_256, v0_v1_x_256, e1_z_256);
__m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256); __m256 V_z_256 = cross256(v0_v1_x_256, e1_y_256, v0_v1_y_256, e1_x_256);
//vertical dot /* vertical dot */
__m256 V_256 = _mm256_mul_ps(V_x_256, dirx256); __m256 V_256 = _mm256_mul_ps(V_x_256, dirx256);
V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);// _mm256_add_ps(V_256, _mm256_mul_ps(V_y_256, diry256)); V_256 = _mm256_fmadd_ps(V_y_256, diry256, V_256);
V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);// _mm256_add_ps(V_256, _mm256_mul_ps(V_z_256, dirz256)); V_256 = _mm256_fmadd_ps(V_z_256, dirz256, V_256);
__m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256); __m256 W_x_256 = cross256(v1_v2_y_256, e2_z_256, v1_v2_z_256, e2_y_256);
__m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256); __m256 W_y_256 = cross256(v1_v2_z_256, e2_x_256, v1_v2_x_256, e2_z_256);
__m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256); __m256 W_z_256 = cross256(v1_v2_x_256, e2_y_256, v1_v2_y_256, e2_x_256);
//vertical dot /* vertical dot */
__m256 W_256 = _mm256_mul_ps(W_x_256, dirx256); __m256 W_256 = _mm256_mul_ps(W_x_256, dirx256);
W_256 = _mm256_fmadd_ps(W_y_256, diry256,W_256);//_mm256_add_ps(W_256, _mm256_mul_ps(W_y_256, diry256)); W_256 = _mm256_fmadd_ps(W_y_256, diry256,W_256);
W_256 = _mm256_fmadd_ps(W_z_256, dirz256,W_256);//_mm256_add_ps(W_256, _mm256_mul_ps(W_z_256, dirz256)); W_256 = _mm256_fmadd_ps(W_z_256, dirz256,W_256);
//const float minUVW = min(U, min(V, W));
//const float maxUVW = max(U, max(V, W));
#if 0
__m256 minUVW_256 = _mm256_min_ps(U_256, _mm256_min_ps(V_256, W_256));
__m256 maxUVW_256 = _mm256_max_ps(U_256, _mm256_max_ps(V_256, W_256));
//if(minUVW < 0.0f && maxUVW > 0.0f)
__m256i mask_minmaxUVW_256 = _mm256_and_si256(
_mm256_cmpgt_epi32(zero256, _mm256_castps_si256(minUVW_256)),
//_mm256_castps_si256(minUVW_256),
_mm256_cmpgt_epi32(_mm256_castps_si256(maxUVW_256), zero256));
#else
__m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31); __m256i U_256_1 = _mm256_srli_epi32(_mm256_castps_si256(U_256), 31);
__m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31); __m256i V_256_1 = _mm256_srli_epi32(_mm256_castps_si256(V_256), 31);
__m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31); __m256i W_256_1 = _mm256_srli_epi32(_mm256_castps_si256(W_256), 31);
@ -204,9 +175,8 @@ int ray_triangle_intersect8(KernelGlobals *kg,
const __m256i two256 = _mm256_set1_epi32(2); const __m256i two256 = _mm256_set1_epi32(2);
__m256i mask_minmaxUVW_256 = _mm256_or_si256( __m256i mask_minmaxUVW_256 = _mm256_or_si256(
_mm256_cmpeq_epi32(one256, UVW_256_1), _mm256_cmpeq_epi32(one256, UVW_256_1),
_mm256_cmpeq_epi32(two256, UVW_256_1) ); _mm256_cmpeq_epi32(two256, UVW_256_1));
#endif
unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256)); unsigned char mask_minmaxUVW_pos = _mm256_movemask_ps(_mm256_castsi256_ps(mask_minmaxUVW_256));
if((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set if((mask_minmaxUVW_pos & prim_num_mask) == prim_num_mask) { //all bits set
@ -214,231 +184,187 @@ int ray_triangle_intersect8(KernelGlobals *kg,
} }
/* Calculate geometry normal and denominator. */ /* Calculate geometry normal and denominator. */
// const float3 Ng1 = cross(e1, e0);
//const Vec3vfM Ng1 = stable_triangle_normal(e2,e1,e0);
__m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256); __m256 Ng1_x_256 = cross256(e1_y_256, e0_z_256, e1_z_256, e0_y_256);
__m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256); __m256 Ng1_y_256 = cross256(e1_z_256, e0_x_256, e1_x_256, e0_z_256);
__m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256); __m256 Ng1_z_256 = cross256(e1_x_256, e0_y_256, e1_y_256, e0_x_256);
//const float3 Ng = Ng1 + Ng1;
Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256); Ng1_x_256 = _mm256_add_ps(Ng1_x_256, Ng1_x_256);
Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256); Ng1_y_256 = _mm256_add_ps(Ng1_y_256, Ng1_y_256);
Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256); Ng1_z_256 = _mm256_add_ps(Ng1_z_256, Ng1_z_256);
//const float den = dot3(Ng, dir); /* vertical dot */
//vertical dot
__m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256); __m256 den_256 = _mm256_mul_ps(Ng1_x_256, dirx256);
den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256,den_256);//_mm256_add_ps(den_256, _mm256_mul_ps(Ng1_y_256, diry256)); den_256 = _mm256_fmadd_ps(Ng1_y_256, diry256,den_256);
den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256,den_256);//_mm256_add_ps(den_256, _mm256_mul_ps(Ng1_z_256, dirz256)); den_256 = _mm256_fmadd_ps(Ng1_z_256, dirz256,den_256);
// __m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
/* Perform depth test. */ /* Perform depth test. */
//const float T = dot3(v0, Ng);
__m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256); __m256 T_256 = _mm256_mul_ps(Ng1_x_256, v0_x_256);
T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256,T_256);//_mm256_add_ps(T_256, _mm256_mul_ps(Ng1_y_256, v0_y_256)); T_256 = _mm256_fmadd_ps(Ng1_y_256, v0_y_256,T_256);
T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256,T_256);//_mm256_add_ps(T_256, _mm256_mul_ps(Ng1_z_256, v0_z_256)); T_256 = _mm256_fmadd_ps(Ng1_z_256, v0_z_256,T_256);
//const int sign_den = (__float_as_int(den) & 0x80000000);
const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000); const __m256i c0x80000000 = _mm256_set1_epi32(0x80000000);
__m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000); __m256i sign_den_256 = _mm256_and_si256(_mm256_castps_si256(den_256), c0x80000000);
//const float sign_T = xor_signmask(T, sign_den);
__m256 sign_T_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256)); __m256 sign_T_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(T_256), sign_den_256));
/*if((sign_T < 0.0f) || mask_minmaxUVW_pos { return false;} */
unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256); unsigned char mask_sign_T = _mm256_movemask_ps(sign_T_256);
if(((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) { if(((mask_minmaxUVW_pos | mask_sign_T) & prim_num_mask) == prim_num_mask) {
return false; return false;
} /**/ }
__m256 xor_signmask_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)); __m256 xor_signmask_256 = _mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256));
ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8]; ccl_align(32) float den8[8], U8[8], V8[8], T8[8], sign_T8[8], xor_signmask8[8];
ccl_align(32) unsigned int mask_minmaxUVW8[8]; ccl_align(32) unsigned int mask_minmaxUVW8[8];
if(visibility == PATH_RAY_SHADOW_OPAQUE){ if(visibility == PATH_RAY_SHADOW_OPAQUE) {
__m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);//~mask_minmaxUVW_256 __m256i mask_final_256 = _mm256_cmpeq_epi32(mask_minmaxUVW_256, zero256);
__m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256);
__m256i maskden256 = _mm256_cmpeq_epi32(_mm256_castps_si256(den_256), zero256); __m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256));
__m256 rayt_256 = _mm256_set1_ps((*isect)->t);
__m256i mask0 = _mm256_cmpgt_epi32(zero256, _mm256_castps_si256(sign_T_256)); __m256i mask1 = _mm256_cmpgt_epi32(_mm256_castps_si256(sign_T_256),
__m256 rayt_256 = _mm256_set1_ps((*isect)->t); _mm256_castps_si256(
_mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256)
__m256i mask1 = _mm256_cmpgt_epi32(_mm256_castps_si256(sign_T_256), )
_mm256_castps_si256( );
_mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256) mask0 = _mm256_or_si256(mask1, mask0);
) mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask)
); mask_final_256 = _mm256_andnot_si256(maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
/* __m256i mask1 = _mm256_castps_si256(_mm256_cmp_ps(sign_T_256, unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
_mm256_mul_ps(_mm256_castsi256_ps(_mm256_xor_si256(_mm256_castps_si256(den_256), sign_den_256)), rayt_256), if((mask_final & prim_num_mask) == 0) {
_CMP_GT_OS return false;
) );*/
mask0 = _mm256_or_si256(mask1, mask0);
//unsigned char mask = _mm256_movemask_ps(_mm256_castsi256_ps(mask0));
//unsigned char maskden = _mm256_movemask_ps(_mm256_castsi256_ps(maskden256));
//unsigned char mask_final = ((~mask) & (~maskden) & (~mask_minmaxUVW_pos));
mask_final_256 = _mm256_andnot_si256(mask0, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask)
mask_final_256 = _mm256_andnot_si256(maskden256, mask_final_256); //(~mask_minmaxUVW_pos) &(~mask) & (~maskden)
unsigned char mask_final = _mm256_movemask_ps(_mm256_castsi256_ps(mask_final_256));
if((mask_final & prim_num_mask) == 0) { //all bits NOT set
return false;
} /**/
unsigned long i = 0;
#if defined(_MSC_VER)
unsigned char res = _BitScanForward(&i, (unsigned long)mask_final);
#else
i = __builtin_ffs(mask_final)-1;
#endif
den_256 = _mm256_rcp_ps(den_256); //inv_den
U_256 = _mm256_mul_ps(U_256, den_256); //*inv_den
V_256 = _mm256_mul_ps(V_256, den_256); //*inv_den
T_256 = _mm256_mul_ps(T_256, den_256); //*inv_den
_mm256_store_ps(U8, U_256);
_mm256_store_ps(V8, V_256);
_mm256_store_ps(T8, T_256);
//here we assume (kernel_tex_fetch(__prim_visibility, (prim_addr +i)) & visibility) is always true
(*isect)->u = U8[i];
(*isect)->v = V8[i];
(*isect)->t = T8[i];
(*isect)->prim = (prim_addr + i);
(*isect)->object = object;
(*isect)->type = PRIMITIVE_TRIANGLE;
return true;
} }
const int i = __bsf(mask_final);
__m256 inv_den_256 = _mm256_rcp_ps(den_256);
U_256 = _mm256_mul_ps(U_256, inv_den_256);
V_256 = _mm256_mul_ps(V_256, inv_den_256);
T_256 = _mm256_mul_ps(T_256, inv_den_256);
_mm256_store_ps(U8, U_256);
_mm256_store_ps(V8, V_256);
_mm256_store_ps(T8, T_256);
/* NOTE: Here we assume visibility for all triangles in the node is
* the same. */
(*isect)->u = U8[i];
(*isect)->v = V8[i];
(*isect)->t = T8[i];
(*isect)->prim = (prim_addr + i);
(*isect)->object = object;
(*isect)->type = PRIMITIVE_TRIANGLE;
return true;
}
else { else {
_mm256_store_ps(den8, den_256); _mm256_store_ps(den8, den_256);
_mm256_store_ps(U8, U_256); _mm256_store_ps(U8, U_256);
_mm256_store_ps(V8, V_256); _mm256_store_ps(V8, V_256);
_mm256_store_ps(T8, T_256); _mm256_store_ps(T8, T_256);
_mm256_store_ps(sign_T8, sign_T_256); _mm256_store_ps(sign_T8, sign_T_256);
_mm256_store_ps(xor_signmask8, xor_signmask_256); _mm256_store_ps(xor_signmask8, xor_signmask_256);
_mm256_store_si256((__m256i*)mask_minmaxUVW8, mask_minmaxUVW_256); _mm256_store_si256((__m256i*)mask_minmaxUVW8, mask_minmaxUVW_256);
int ret = false; int ret = false;
if(visibility == PATH_RAY_SHADOW) { if(visibility == PATH_RAY_SHADOW) {
for(int i = 0; i < prim_num; i++) {
if(!mask_minmaxUVW8[i]) {
#ifdef __VISIBILITY_FLAG__
if(kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility)
#endif
{
if((sign_T8[i] >= 0.0f) &&
(sign_T8[i] <= (*isect)->t * xor_signmask8[i]))
{
if(den8[i]) {
const float inv_den = 1.0f / den8[i];
(*isect)->u = U8[i] * inv_den;
(*isect)->v = V8[i] * inv_den;
(*isect)->t = T8[i] * inv_den;
(*isect)->prim = (prim_addr + i);
(*isect)->object = object;
(*isect)->type = PRIMITIVE_TRIANGLE;
int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
int shader = 0;
#ifdef __HAIR__
if(kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
#endif
{
shader = kernel_tex_fetch(__tri_shader, prim);
}
#ifdef __HAIR__
else {
float4 str = kernel_tex_fetch(__curves, prim);
shader = __float_as_int(str.z);
}
#endif
int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
/* if no transparent shadows, all light is blocked */
if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
return 2;
}
/* if maximum number of hits reached, block all light */
else if(*num_hits == max_hits) {
return 2;
}
/* move on to next entry in intersections array */
ret = true;
(*isect)++;
(*num_hits)++;
(*num_hits_in_instance)++;
(*isect)->t = isec_t;
} //den
} //if sign
} //vis
}//if mask
} //for
}
else { //default case
for(int i = 0; i < prim_num; i++) { for(int i = 0; i < prim_num; i++) {
if(!mask_minmaxUVW8[i]) { if(mask_minmaxUVW8[i]) {
continue;
}
#ifdef __VISIBILITY_FLAG__ #ifdef __VISIBILITY_FLAG__
if(kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
continue;
}
#endif #endif
{ if((sign_T8[i] < 0.0f) ||
if((sign_T8[i] >= 0.0f) && (sign_T8[i] > (*isect)->t * xor_signmask8[i]))
(sign_T8[i] <= (*isect)->t * xor_signmask8[i])) {
{ continue;
if(den8[i]) { }
const float inv_den = 1.0f / den8[i]; if(!den8[i]) {
continue;
(*isect)->u = U8[i] * inv_den; }
(*isect)->v = V8[i] * inv_den; const float inv_den = 1.0f / den8[i];
(*isect)->t = T8[i] * inv_den; (*isect)->u = U8[i] * inv_den;
(*isect)->v = V8[i] * inv_den;
(*isect)->prim = (prim_addr + i); (*isect)->t = T8[i] * inv_den;
(*isect)->object = object; (*isect)->prim = (prim_addr + i);
(*isect)->type = PRIMITIVE_TRIANGLE; (*isect)->object = object;
(*isect)->type = PRIMITIVE_TRIANGLE;
ret = true; const int prim = kernel_tex_fetch(__prim_index, (*isect)->prim);
} //den int shader = 0;
} //if sign #ifdef __HAIR__
} //vis if(kernel_tex_fetch(__prim_type, (*isect)->prim) & PRIMITIVE_ALL_TRIANGLE)
}//if mask #endif
} //for {
} //default shader = kernel_tex_fetch(__tri_shader, prim);
return ret; }
}// else PATH_RAY_SHADOW_OPAQUE #ifdef __HAIR__
else {
float4 str = kernel_tex_fetch(__curves, prim);
shader = __float_as_int(str.z);
}
#endif
const int flag = kernel_tex_fetch(__shaders, (shader & SHADER_MASK)).flags;
/* If no transparent shadows, all light is blocked. */
if(!(flag & SD_HAS_TRANSPARENT_SHADOW)) {
return 2;
}
/* If maximum number of hits reached, block all light. */
else if(num_hits == NULL || *num_hits == max_hits) {
return 2;
}
/* Move on to next entry in intersections array. */
ret = true;
(*isect)++;
(*num_hits)++;
(*num_hits_in_instance)++;
(*isect)->t = isect_t;
}
}
else {
for(int i = 0; i < prim_num; i++) {
if(mask_minmaxUVW8[i]) {
continue;
}
#ifdef __VISIBILITY_FLAG__
if((kernel_tex_fetch(__prim_visibility, (prim_addr + i)) & visibility) == 0) {
continue;
}
#endif
if((sign_T8[i] < 0.0f) ||
(sign_T8[i] > (*isect)->t * xor_signmask8[i]))
{
continue;
}
if(!den8[i]) {
continue;
}
const float inv_den = 1.0f / den8[i];
(*isect)->u = U8[i] * inv_den;
(*isect)->v = V8[i] * inv_den;
(*isect)->t = T8[i] * inv_den;
(*isect)->prim = (prim_addr + i);
(*isect)->object = object;
(*isect)->type = PRIMITIVE_TRIANGLE;
ret = true;
}
}
return ret;
}
} }
//vz static ccl_device_inline int triangle_intersect8(
ccl_device_inline KernelGlobals *kg,
int triangle_intersect8(KernelGlobals *kg, Intersection **isect,
Intersection **isect, float3 P,
float3 P, float3 dir,
float3 dir, uint visibility,
uint visibility, int object,
int object, int prim_addr,
int prim_addr, int prim_num,
int prim_num, uint *num_hits,
uint *num_hits, uint max_hits,
uint max_hits, int *num_hits_in_instance,
int *num_hits_in_instance, float isect_t)
float isec_t)
{ {
__m128 tri_a[8], tri_b[8], tri_c[8]; __m128 tri_a[8], tri_b[8], tri_c[8];
__m256 tritmp[12], tri[12]; __m256 tritmp[12], tri[12];
@ -540,7 +466,7 @@ int triangle_intersect8(KernelGlobals *kg,
num_hits, num_hits,
max_hits, max_hits,
num_hits_in_instance, num_hits_in_instance,
isec_t); isect_t);
return result; return result;
} }

@ -34,6 +34,7 @@ struct Main;
struct MemFile; struct MemFile;
struct ReportList; struct ReportList;
struct UserDef; struct UserDef;
struct BlendFileReadParams;
enum { enum {
BKE_BLENDFILE_READ_FAIL = 0, /* no load */ BKE_BLENDFILE_READ_FAIL = 0, /* no load */
@ -43,13 +44,16 @@ enum {
int BKE_blendfile_read( int BKE_blendfile_read(
struct bContext *C, const char *filepath, struct bContext *C, const char *filepath,
struct ReportList *reports, int skip_flag); const struct BlendFileReadParams *params,
struct ReportList *reports);
bool BKE_blendfile_read_from_memory( bool BKE_blendfile_read_from_memory(
struct bContext *C, const void *filebuf, int filelength, struct bContext *C, const void *filebuf, int filelength, bool update_defaults,
struct ReportList *reports, int skip_flag, bool update_defaults); const struct BlendFileReadParams *params,
struct ReportList *reports);
bool BKE_blendfile_read_from_memfile( bool BKE_blendfile_read_from_memfile(
struct bContext *C, struct MemFile *memfile, struct bContext *C, struct MemFile *memfile,
struct ReportList *reports, int skip_flag); const struct BlendFileReadParams *params,
struct ReportList *reports);
void BKE_blendfile_read_make_empty(struct bContext *C); void BKE_blendfile_read_make_empty(struct bContext *C);
struct UserDef *BKE_blendfile_userdef_read( struct UserDef *BKE_blendfile_userdef_read(

@ -55,6 +55,7 @@
#include "BKE_main.h" #include "BKE_main.h"
#include "BLO_undofile.h" #include "BLO_undofile.h"
#include "BLO_readfile.h"
#include "BLO_writefile.h" #include "BLO_writefile.h"
#include "DEG_depsgraph.h" #include "DEG_depsgraph.h"
@ -81,7 +82,10 @@ bool BKE_memfile_undo_decode(MemFileUndoData *mfu, bContext *C)
success = (BKE_blendfile_read(C, mfu->filename, NULL, 0) != BKE_BLENDFILE_READ_FAIL); success = (BKE_blendfile_read(C, mfu->filename, NULL, 0) != BKE_BLENDFILE_READ_FAIL);
} }
else { else {
success = BKE_blendfile_read_from_memfile(C, &mfu->memfile, NULL, 0); success = BKE_blendfile_read_from_memfile(
C, &mfu->memfile,
&(const struct BlendFileReadParams){0},
NULL);
} }
/* Restore, bmain has been re-allocated. */ /* Restore, bmain has been re-allocated. */

@ -114,11 +114,12 @@ static bool wm_scene_is_visible(wmWindowManager *wm, Scene *scene)
*/ */
static void setup_app_data( static void setup_app_data(
bContext *C, BlendFileData *bfd, bContext *C, BlendFileData *bfd,
const char *filepath, ReportList *reports) const char *filepath,
const bool is_startup,
ReportList *reports)
{ {
Main *bmain = G_MAIN; Main *bmain = G_MAIN;
Scene *curscene = NULL; Scene *curscene = NULL;
const bool is_startup = (bfd->filename[0] == '\0');
const bool recover = (G.fileflags & G_FILE_RECOVER) != 0; const bool recover = (G.fileflags & G_FILE_RECOVER) != 0;
enum { enum {
LOAD_UI = 1, LOAD_UI = 1,
@ -314,7 +315,7 @@ static void setup_app_data(
bmain->recovered = 0; bmain->recovered = 0;
/* startup.blend or recovered startup */ /* startup.blend or recovered startup */
if (bfd->filename[0] == 0) { if (is_startup) {
bmain->name[0] = '\0'; bmain->name[0] = '\0';
} }
else if (recover && G.relbase_valid) { else if (recover && G.relbase_valid) {
@ -376,7 +377,8 @@ static int handle_subversion_warning(Main *main, ReportList *reports)
int BKE_blendfile_read( int BKE_blendfile_read(
bContext *C, const char *filepath, bContext *C, const char *filepath,
ReportList *reports, int skip_flags) const struct BlendFileReadParams *params,
ReportList *reports)
{ {
BlendFileData *bfd; BlendFileData *bfd;
int retval = BKE_BLENDFILE_READ_OK; int retval = BKE_BLENDFILE_READ_OK;
@ -386,7 +388,7 @@ int BKE_blendfile_read(
printf("Read blend: %s\n", filepath); printf("Read blend: %s\n", filepath);
} }
bfd = BLO_read_from_file(filepath, reports, skip_flags); bfd = BLO_read_from_file(filepath, params->skip_flags, reports);
if (bfd) { if (bfd) {
if (bfd->user) { if (bfd->user) {
retval = BKE_BLENDFILE_READ_OK_USERPREFS; retval = BKE_BLENDFILE_READ_OK_USERPREFS;
@ -399,7 +401,7 @@ int BKE_blendfile_read(
retval = BKE_BLENDFILE_READ_FAIL; retval = BKE_BLENDFILE_READ_FAIL;
} }
else { else {
setup_app_data(C, bfd, filepath, reports); setup_app_data(C, bfd, filepath, params->is_startup, reports);
} }
} }
else else
@ -409,16 +411,17 @@ int BKE_blendfile_read(
} }
bool BKE_blendfile_read_from_memory( bool BKE_blendfile_read_from_memory(
bContext *C, const void *filebuf, int filelength, bContext *C, const void *filebuf, int filelength, bool update_defaults,
ReportList *reports, int skip_flags, bool update_defaults) const struct BlendFileReadParams *params,
ReportList *reports)
{ {
BlendFileData *bfd; BlendFileData *bfd;
bfd = BLO_read_from_memory(filebuf, filelength, reports, skip_flags); bfd = BLO_read_from_memory(filebuf, filelength, params->skip_flags, reports);
if (bfd) { if (bfd) {
if (update_defaults) if (update_defaults)
BLO_update_defaults_startup_blend(bfd->main, NULL); BLO_update_defaults_startup_blend(bfd->main, NULL);
setup_app_data(C, bfd, "<memory2>", reports); setup_app_data(C, bfd, "<memory2>", params->is_startup, reports);
} }
else { else {
BKE_reports_prepend(reports, "Loading failed: "); BKE_reports_prepend(reports, "Loading failed: ");
@ -430,12 +433,13 @@ bool BKE_blendfile_read_from_memory(
/* memfile is the undo buffer */ /* memfile is the undo buffer */
bool BKE_blendfile_read_from_memfile( bool BKE_blendfile_read_from_memfile(
bContext *C, struct MemFile *memfile, bContext *C, struct MemFile *memfile,
ReportList *reports, int skip_flags) const struct BlendFileReadParams *params,
ReportList *reports)
{ {
Main *bmain = CTX_data_main(C); Main *bmain = CTX_data_main(C);
BlendFileData *bfd; BlendFileData *bfd;
bfd = BLO_read_from_memfile(bmain, BKE_main_blendfile_path(bmain), memfile, reports, skip_flags); bfd = BLO_read_from_memfile(bmain, BKE_main_blendfile_path(bmain), memfile, params->skip_flags, reports);
if (bfd) { if (bfd) {
/* remove the unused screens and wm */ /* remove the unused screens and wm */
while (bfd->main->wm.first) while (bfd->main->wm.first)
@ -443,7 +447,7 @@ bool BKE_blendfile_read_from_memfile(
while (bfd->main->screen.first) while (bfd->main->screen.first)
BKE_libblock_free(bfd->main, bfd->main->screen.first); BKE_libblock_free(bfd->main, bfd->main->screen.first);
setup_app_data(C, bfd, "<memory1>", reports); setup_app_data(C, bfd, "<memory1>", params->is_startup, reports);
} }
else { else {
BKE_reports_prepend(reports, "Loading failed: "); BKE_reports_prepend(reports, "Loading failed: ");
@ -484,7 +488,7 @@ UserDef *BKE_blendfile_userdef_read(const char *filepath, ReportList *reports)
BlendFileData *bfd; BlendFileData *bfd;
UserDef *userdef = NULL; UserDef *userdef = NULL;
bfd = BLO_read_from_file(filepath, reports, BLO_READ_SKIP_ALL & ~BLO_READ_SKIP_USERDEF); bfd = BLO_read_from_file(filepath, BLO_READ_SKIP_ALL & ~BLO_READ_SKIP_USERDEF, reports);
if (bfd) { if (bfd) {
if (bfd->user) { if (bfd->user) {
userdef = bfd->user; userdef = bfd->user;
@ -504,7 +508,10 @@ UserDef *BKE_blendfile_userdef_read_from_memory(
BlendFileData *bfd; BlendFileData *bfd;
UserDef *userdef = NULL; UserDef *userdef = NULL;
bfd = BLO_read_from_memory(filebuf, filelength, reports, BLO_READ_SKIP_ALL & ~BLO_READ_SKIP_USERDEF); bfd = BLO_read_from_memory(
filebuf, filelength,
BLO_READ_SKIP_ALL & ~BLO_READ_SKIP_USERDEF,
reports);
if (bfd) { if (bfd) {
if (bfd->user) { if (bfd->user) {
userdef = bfd->user; userdef = bfd->user;
@ -567,10 +574,10 @@ WorkspaceConfigFileData *BKE_blendfile_workspace_config_read(const char *filepat
WorkspaceConfigFileData *workspace_config = NULL; WorkspaceConfigFileData *workspace_config = NULL;
if (filepath) { if (filepath) {
bfd = BLO_read_from_file(filepath, reports, BLO_READ_SKIP_USERDEF); bfd = BLO_read_from_file(filepath, BLO_READ_SKIP_USERDEF, reports);
} }
else { else {
bfd = BLO_read_from_memory(filebuf, filelength, reports, BLO_READ_SKIP_USERDEF); bfd = BLO_read_from_memory(filebuf, filelength, BLO_READ_SKIP_USERDEF, reports);
} }
if (bfd) { if (bfd) {

@ -81,6 +81,10 @@ typedef struct WorkspaceConfigFileData {
struct ListBase workspaces; struct ListBase workspaces;
} WorkspaceConfigFileData; } WorkspaceConfigFileData;
struct BlendFileReadParams {
uint skip_flags : 2; /* eBLOReadSkip */
uint is_startup : 1;
};
/* skip reading some data-block types (may want to skip screen data too). */ /* skip reading some data-block types (may want to skip screen data too). */
typedef enum eBLOReadSkip { typedef enum eBLOReadSkip {
@ -93,13 +97,16 @@ typedef enum eBLOReadSkip {
BlendFileData *BLO_read_from_file( BlendFileData *BLO_read_from_file(
const char *filepath, const char *filepath,
struct ReportList *reports, eBLOReadSkip skip_flag); eBLOReadSkip skip_flags,
struct ReportList *reports);
BlendFileData *BLO_read_from_memory( BlendFileData *BLO_read_from_memory(
const void *mem, int memsize, const void *mem, int memsize,
struct ReportList *reports, eBLOReadSkip skip_flag); eBLOReadSkip skip_flags,
struct ReportList *reports);
BlendFileData *BLO_read_from_memfile( BlendFileData *BLO_read_from_memfile(
struct Main *oldmain, const char *filename, struct MemFile *memfile, struct Main *oldmain, const char *filename, struct MemFile *memfile,
struct ReportList *reports, eBLOReadSkip skip_flag); eBLOReadSkip skip_flags,
struct ReportList *reports);
void BLO_blendfiledata_free(BlendFileData *bfd); void BLO_blendfiledata_free(BlendFileData *bfd);

@ -319,7 +319,8 @@ void BLO_blendhandle_close(BlendHandle *bh)
*/ */
BlendFileData *BLO_read_from_file( BlendFileData *BLO_read_from_file(
const char *filepath, const char *filepath,
ReportList *reports, eBLOReadSkip skip_flags) eBLOReadSkip skip_flags,
ReportList *reports)
{ {
BlendFileData *bfd = NULL; BlendFileData *bfd = NULL;
FileData *fd; FileData *fd;
@ -346,7 +347,8 @@ BlendFileData *BLO_read_from_file(
*/ */
BlendFileData *BLO_read_from_memory( BlendFileData *BLO_read_from_memory(
const void *mem, int memsize, const void *mem, int memsize,
ReportList *reports, eBLOReadSkip skip_flags) eBLOReadSkip skip_flags,
ReportList *reports)
{ {
BlendFileData *bfd = NULL; BlendFileData *bfd = NULL;
FileData *fd; FileData *fd;
@ -370,7 +372,8 @@ BlendFileData *BLO_read_from_memory(
*/ */
BlendFileData *BLO_read_from_memfile( BlendFileData *BLO_read_from_memfile(
Main *oldmain, const char *filename, MemFile *memfile, Main *oldmain, const char *filename, MemFile *memfile,
ReportList *reports, eBLOReadSkip skip_flags) eBLOReadSkip skip_flags,
ReportList *reports)
{ {
BlendFileData *bfd = NULL; BlendFileData *bfd = NULL;
FileData *fd; FileData *fd;

@ -131,7 +131,7 @@ void memfile_chunk_add(
struct Main *BLO_memfile_main_get(struct MemFile *memfile, struct Main *oldmain, struct Scene **r_scene) struct Main *BLO_memfile_main_get(struct MemFile *memfile, struct Main *oldmain, struct Scene **r_scene)
{ {
struct Main *bmain_undo = NULL; struct Main *bmain_undo = NULL;
BlendFileData *bfd = BLO_read_from_memfile(oldmain, BKE_main_blendfile_path(oldmain), memfile, NULL, BLO_READ_SKIP_NONE); BlendFileData *bfd = BLO_read_from_memfile(oldmain, BKE_main_blendfile_path(oldmain), memfile, BLO_READ_SKIP_NONE, NULL);
if (bfd) { if (bfd) {
bmain_undo = bfd->main; bmain_undo = bfd->main;

@ -207,7 +207,7 @@ static Main *load_main_from_memory(const void *blend, int blend_size)
BlendFileData *bfd; BlendFileData *bfd;
G.fileflags |= G_FILE_NO_UI; G.fileflags |= G_FILE_NO_UI;
bfd = BLO_read_from_memory(blend, blend_size, NULL, BLO_READ_SKIP_NONE); bfd = BLO_read_from_memory(blend, blend_size, BLO_READ_SKIP_NONE, NULL);
if (bfd) { if (bfd) {
bmain = bfd->main; bmain = bfd->main;

@ -594,7 +594,10 @@ bool WM_file_read(bContext *C, const char *filepath, ReportList *reports)
/* confusing this global... */ /* confusing this global... */
G.relbase_valid = 1; G.relbase_valid = 1;
retval = BKE_blendfile_read(C, filepath, reports, 0); retval = BKE_blendfile_read(
C, filepath,
&(const struct BlendFileReadParams){0},
reports);
/* BKE_file_read sets new Main into context. */ /* BKE_file_read sets new Main into context. */
Main *bmain = CTX_data_main(C); Main *bmain = CTX_data_main(C);
@ -865,7 +868,13 @@ int wm_homefile_read(
if (!use_factory_settings || (filepath_startup[0] != '\0')) { if (!use_factory_settings || (filepath_startup[0] != '\0')) {
if (BLI_access(filepath_startup, R_OK) == 0) { if (BLI_access(filepath_startup, R_OK) == 0) {
success = (BKE_blendfile_read(C, filepath_startup, NULL, skip_flags) != BKE_BLENDFILE_READ_FAIL); success = BKE_blendfile_read(
C, filepath_startup,
&(const struct BlendFileReadParams){
.is_startup = true,
.skip_flags = skip_flags,
},
NULL) != BKE_BLENDFILE_READ_FAIL;
} }
if (BLI_listbase_is_empty(&U.themes)) { if (BLI_listbase_is_empty(&U.themes)) {
if (G.debug & G_DEBUG) if (G.debug & G_DEBUG)
@ -884,8 +893,12 @@ int wm_homefile_read(
if (success == false) { if (success == false) {
success = BKE_blendfile_read_from_memory( success = BKE_blendfile_read_from_memory(
C, datatoc_startup_blend, datatoc_startup_blend_size, C, datatoc_startup_blend, datatoc_startup_blend_size, true,
NULL, skip_flags, true); &(const struct BlendFileReadParams){
.is_startup = true,
.skip_flags = skip_flags,
},
NULL);
if (success) { if (success) {
if (use_userdef) { if (use_userdef) {
if ((skip_flags & BLO_READ_SKIP_USERDEF) == 0) { if ((skip_flags & BLO_READ_SKIP_USERDEF) == 0) {