/* * Copyright 2011-2013 Blender Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License */ #ifndef __UTIL_OPTIMIZATION_H__ #define __UTIL_OPTIMIZATION_H__ #ifndef __KERNEL_GPU__ /* x86 * * Compile a regular, SSE2 and SSE3 kernel. */ #if defined(i386) || defined(_M_IX86) #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE2 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 #endif /* x86-64 * * Compile a regular (includes SSE2), SSE3 and SSE 4.1 kernel. */ #if defined(__x86_64__) || defined(_M_X64) /* SSE2 is always available on x86-64 CPUs, so auto enable */ #define __KERNEL_SSE2__ /* no SSE2 kernel on x86-64, part of regular kernel */ #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE3 #define WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 /* VC2008 is not ready for sse41, probably broken blendv intrinsic... */ #if defined(_MSC_VER) && (_MSC_VER < 1700) #undef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41 #endif #endif /* SSE Experiment * * This is disabled code for an experiment to use SSE types globally for types * such as float3 and float4. Currently this gives an overall slowdown. */ #if 0 #define __KERNEL_SSE__ #ifndef __KERNEL_SSE2__ #define __KERNEL_SSE2__ #endif #ifndef __KERNEL_SSE3__ #define __KERNEL_SSE3__ #endif #ifndef __KERNEL_SSSE3__ #define __KERNEL_SSSE3__ #endif #ifndef __KERNEL_SSE4__ #define __KERNEL_SSE4__ #endif #endif /* SSE Intrinsics includes * * We assume __KERNEL_SSEX__ flags to have been defined at this point */ /* SSE intrinsics headers */ #ifndef FREE_WINDOWS64 #ifdef __KERNEL_SSE2__ #include /* SSE 1 */ #include /* SSE 2 */ #endif #ifdef __KERNEL_SSE3__ #include /* SSE 3 */ #endif #ifdef __KERNEL_SSSE3__ #include /* SSSE 3 */ #endif #ifdef __KERNEL_SSE41__ #include /* SSE 4.1 */ #endif #else /* MinGW64 has conflicting declarations for these SSE headers in . * Since we can't avoid including , better only include that */ #include #endif #endif #endif /* __UTIL_OPTIMIZATION_H__ */