forked from bartvdbraak/blender
6046a86522
Brings support of NVRTC bindings and also makes it easier to tweak libraries paths and use multiple alternative names for libraries.
641 lines
19 KiB
Python
641 lines
19 KiB
Python
#!/usr/bin/env python3
|
|
#
|
|
# Copyright 2014 Blender Foundation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License
|
|
|
|
# This script generates either header or implementation file from
|
|
# a CUDA header files.
|
|
#
|
|
# Usage: cuew hdr|impl [/path/to/cuda/includes]
|
|
# - hdr means header file will be generated and printed to stdout.
|
|
# - impl means implementation file will be generated and printed to stdout.
|
|
# - /path/to/cuda/includes is a path to a folder with cuda.h and cudaGL.h
|
|
# for which wrangler will be generated.
|
|
|
|
import os
|
|
import sys
|
|
from cuda_errors import CUDA_ERRORS
|
|
from pycparser import c_parser, c_ast, parse_file
|
|
from subprocess import Popen, PIPE
|
|
|
|
INCLUDE_DIR = "/usr/include"
|
|
LIB = "CUEW"
|
|
REAL_LIB = "CUDA"
|
|
VERSION_MAJOR = "1"
|
|
VERSION_MINOR = "2"
|
|
COPYRIGHT = """/*
|
|
* Copyright 2011-2014 Blender Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License
|
|
*/"""
|
|
FILES = ["cuda.h", "cudaGL.h", 'nvrtc.h']
|
|
|
|
TYPEDEFS = []
|
|
FUNC_TYPEDEFS = []
|
|
SYMBOLS = []
|
|
DEFINES = []
|
|
DEFINES_V2 = []
|
|
ERRORS = []
|
|
|
|
|
|
class FuncDefVisitor(c_ast.NodeVisitor):
|
|
indent = 0
|
|
prev_complex = False
|
|
dummy_typedefs = ['size_t', 'CUdeviceptr']
|
|
|
|
def _get_quals_string(self, node):
|
|
if node.quals:
|
|
return ' '.join(node.quals) + ' '
|
|
return ''
|
|
|
|
def _get_ident_type(self, node):
|
|
if isinstance(node, c_ast.PtrDecl):
|
|
return self._get_ident_type(node.type.type) + '*'
|
|
if isinstance(node, c_ast.ArrayDecl):
|
|
return self._get_ident_type(node.type)
|
|
elif isinstance(node, c_ast.Struct):
|
|
if node.name:
|
|
return 'struct ' + node.name
|
|
else:
|
|
self.indent += 1
|
|
struct = self._stringify_struct(node)
|
|
self.indent -= 1
|
|
return "struct {\n" + \
|
|
struct + (" " * self.indent) + "}"
|
|
elif isinstance(node, c_ast.Union):
|
|
self.indent += 1
|
|
union = self._stringify_struct(node)
|
|
self.indent -= 1
|
|
return "union {\n" + union + (" " * self.indent) + "}"
|
|
elif isinstance(node, c_ast.Enum):
|
|
if node.name is not None:
|
|
return 'enum ' + node.name
|
|
else:
|
|
return 'enum '
|
|
elif isinstance(node, c_ast.TypeDecl):
|
|
return self._get_ident_type(node.type)
|
|
else:
|
|
return node.names[0]
|
|
|
|
def _stringify_param(self, param):
|
|
param_type = param.type
|
|
result = self._get_quals_string(param)
|
|
result += self._get_ident_type(param_type)
|
|
if param.name:
|
|
result += ' ' + param.name
|
|
if isinstance(param_type, c_ast.ArrayDecl):
|
|
# TODO(sergey): Workaround to deal with the
|
|
# preprocessed file where array size got
|
|
# substituded.
|
|
dim = param_type.dim.value
|
|
if param.name == "reserved" and dim == "64":
|
|
dim = "CU_IPC_HANDLE_SIZE"
|
|
result += '[' + dim + ']'
|
|
return result
|
|
|
|
def _stringify_params(self, params):
|
|
result = []
|
|
for param in params:
|
|
result.append(self._stringify_param(param))
|
|
return ', '.join(result)
|
|
|
|
def _stringify_struct(self, node):
|
|
result = ""
|
|
children = node.children()
|
|
for child in children:
|
|
member = self._stringify_param(child[1])
|
|
result += (" " * self.indent) + member + ";\n"
|
|
return result
|
|
|
|
def _stringify_enum(self, node):
|
|
result = ""
|
|
children = node.children()
|
|
for child in children:
|
|
if isinstance(child[1], c_ast.EnumeratorList):
|
|
enumerators = child[1].enumerators
|
|
for enumerator in enumerators:
|
|
result += (" " * self.indent) + enumerator.name
|
|
if enumerator.value:
|
|
result += " = " + enumerator.value.value
|
|
result += ",\n"
|
|
if enumerator.name.startswith("CUDA_ERROR_"):
|
|
ERRORS.append(enumerator.name)
|
|
return result
|
|
|
|
def visit_Decl(self, node):
|
|
if node.type.__class__.__name__ == 'FuncDecl':
|
|
if isinstance(node.type, c_ast.FuncDecl):
|
|
func_decl = node.type
|
|
func_decl_type = func_decl.type
|
|
|
|
typedef = 'typedef '
|
|
symbol_name = None
|
|
|
|
if isinstance(func_decl_type, c_ast.TypeDecl):
|
|
symbol_name = func_decl_type.declname
|
|
typedef += self._get_quals_string(func_decl_type)
|
|
typedef += self._get_ident_type(func_decl_type.type)
|
|
typedef += ' CUDAAPI'
|
|
typedef += ' t' + symbol_name
|
|
elif isinstance(func_decl_type, c_ast.PtrDecl):
|
|
ptr_type = func_decl_type.type
|
|
symbol_name = ptr_type.declname
|
|
typedef += self._get_quals_string(ptr_type)
|
|
typedef += self._get_ident_type(func_decl_type)
|
|
typedef += ' CUDAAPI'
|
|
typedef += ' t' + symbol_name
|
|
|
|
typedef += '(' + \
|
|
self._stringify_params(func_decl.args.params) + \
|
|
');'
|
|
|
|
SYMBOLS.append(symbol_name)
|
|
FUNC_TYPEDEFS.append(typedef)
|
|
|
|
def visit_Typedef(self, node):
|
|
if node.name in self.dummy_typedefs:
|
|
return
|
|
|
|
complex = False
|
|
type = self._get_ident_type(node.type)
|
|
quals = self._get_quals_string(node)
|
|
|
|
if isinstance(node.type.type, c_ast.Struct):
|
|
self.indent += 1
|
|
struct = self._stringify_struct(node.type.type)
|
|
self.indent -= 1
|
|
typedef = quals + type + " {\n" + struct + "} " + node.name
|
|
complex = True
|
|
elif isinstance(node.type.type, c_ast.Enum):
|
|
self.indent += 1
|
|
enum = self._stringify_enum(node.type.type)
|
|
self.indent -= 1
|
|
typedef = quals + type + " {\n" + enum + "} " + node.name
|
|
complex = True
|
|
else:
|
|
typedef = quals + type + " " + node.name
|
|
if complex or self.prev_complex:
|
|
typedef = "\ntypedef " + typedef + ";"
|
|
else:
|
|
typedef = "typedef " + typedef + ";"
|
|
|
|
TYPEDEFS.append(typedef)
|
|
|
|
self.prev_complex = complex
|
|
|
|
|
|
def get_latest_cpp():
|
|
path_prefix = "/usr/bin"
|
|
for cpp_version in ["9", "8", "7", "6", "5", "4"]:
|
|
test_cpp = os.path.join(path_prefix, "cpp-4." + cpp_version)
|
|
if os.path.exists(test_cpp):
|
|
return test_cpp
|
|
return None
|
|
|
|
|
|
def preprocess_file(filename, cpp_path):
|
|
args = [cpp_path, "-I./"]
|
|
if filename.endswith("GL.h"):
|
|
args.append("-DCUDAAPI= ")
|
|
args.append(filename)
|
|
|
|
try:
|
|
pipe = Popen(args,
|
|
stdout=PIPE,
|
|
universal_newlines=True)
|
|
text = pipe.communicate()[0]
|
|
except OSError as e:
|
|
raise RuntimeError("Unable to invoke 'cpp'. " +
|
|
'Make sure its path was passed correctly\n' +
|
|
('Original error: %s' % e))
|
|
|
|
return text
|
|
|
|
|
|
def parse_files():
|
|
parser = c_parser.CParser()
|
|
cpp_path = get_latest_cpp()
|
|
|
|
for filename in FILES:
|
|
filepath = os.path.join(INCLUDE_DIR, filename)
|
|
dummy_typedefs = {}
|
|
text = preprocess_file(filepath, cpp_path)
|
|
|
|
if filepath.endswith("GL.h"):
|
|
dummy_typedefs = {
|
|
"CUresult": "int",
|
|
"CUgraphicsResource": "void *",
|
|
"CUdevice": "void *",
|
|
"CUcontext": "void *",
|
|
"CUdeviceptr": "void *",
|
|
"CUstream": "void *"
|
|
}
|
|
|
|
text = "typedef int GLint;\n" + text
|
|
text = "typedef unsigned int GLuint;\n" + text
|
|
text = "typedef unsigned int GLenum;\n" + text
|
|
text = "typedef long size_t;\n" + text
|
|
|
|
for typedef in sorted(dummy_typedefs):
|
|
text = "typedef " + dummy_typedefs[typedef] + " " + \
|
|
typedef + ";\n" + text
|
|
|
|
ast = parser.parse(text, filepath)
|
|
|
|
with open(filepath) as f:
|
|
lines = f.readlines()
|
|
for line in lines:
|
|
if line.startswith("#define"):
|
|
line = line[8:-1]
|
|
token = line.split()
|
|
if token[0] not in ("__cuda_cuda_h__",
|
|
"CUDA_CB",
|
|
"CUDAAPI",
|
|
"CUDAGL_H",
|
|
"__NVRTC_H__"):
|
|
DEFINES.append(token)
|
|
|
|
for line in lines:
|
|
# TODO(sergey): Use better matching rule for _v2 symbols.
|
|
if line[0].isspace() and line.lstrip().startswith("#define"):
|
|
line = line[12:-1]
|
|
token = line.split()
|
|
if len(token) == 2 and (token[1].endswith("_v2") or
|
|
token[1].endswith("_v2)")):
|
|
if token[1].startswith('__CUDA_API_PTDS') or \
|
|
token[1].startswith('__CUDA_API_PTSZ'):
|
|
token[1] = token[1][16:-1]
|
|
DEFINES_V2.append(token)
|
|
|
|
v = FuncDefVisitor()
|
|
for typedef in dummy_typedefs:
|
|
v.dummy_typedefs.append(typedef)
|
|
v.visit(ast)
|
|
|
|
FUNC_TYPEDEFS.append('')
|
|
SYMBOLS.append('')
|
|
|
|
|
|
def print_copyright():
|
|
print(COPYRIGHT)
|
|
print("")
|
|
|
|
|
|
def open_header_guard():
|
|
print("#ifndef __%s_H__" % (LIB))
|
|
print("#define __%s_H__" % (LIB))
|
|
print("")
|
|
print("#ifdef __cplusplus")
|
|
print("extern \"C\" {")
|
|
print("#endif")
|
|
print("")
|
|
|
|
|
|
def close_header_guard():
|
|
print("")
|
|
print("#ifdef __cplusplus")
|
|
print("}")
|
|
print("#endif")
|
|
print("")
|
|
print("#endif /* __%s_H__ */" % (LIB))
|
|
|
|
|
|
def print_header():
|
|
print_copyright()
|
|
open_header_guard()
|
|
|
|
# Fot size_t.
|
|
print("#include <stdlib.h>")
|
|
print("")
|
|
|
|
print("/* Defines. */")
|
|
print("#define %s_VERSION_MAJOR %s" % (LIB, VERSION_MAJOR))
|
|
print("#define %s_VERSION_MINOR %s" % (LIB, VERSION_MINOR))
|
|
print("")
|
|
for define in DEFINES:
|
|
print('#define %s' % (' '.join(define)))
|
|
print("")
|
|
|
|
print("""/* Functions which changed 3.1 -> 3.2 for 64 bit stuff,
|
|
* the cuda library has both the old ones for compatibility and new
|
|
* ones with _v2 postfix,
|
|
*/""")
|
|
for define in DEFINES_V2:
|
|
print('#define %s' % (' '.join(define)))
|
|
print("")
|
|
|
|
print("/* Types. */")
|
|
|
|
# We handle this specially because of the file is
|
|
# getting preprocessed.
|
|
print("""#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
|
|
typedef unsigned long long CUdeviceptr;
|
|
#else
|
|
typedef unsigned int CUdeviceptr;
|
|
#endif
|
|
""")
|
|
|
|
for typedef in TYPEDEFS:
|
|
print('%s' % (typedef))
|
|
|
|
# TDO(sergey): This is only specific to CUDA wrapper.
|
|
print("""
|
|
#ifdef _WIN32
|
|
# define CUDAAPI __stdcall
|
|
# define CUDA_CB __stdcall
|
|
#else
|
|
# define CUDAAPI
|
|
# define CUDA_CB
|
|
#endif
|
|
""")
|
|
|
|
print("/* Function types. */")
|
|
for func_typedef in FUNC_TYPEDEFS:
|
|
print('%s' % (func_typedef))
|
|
print("")
|
|
|
|
print("/* Function declarations. */")
|
|
for symbol in SYMBOLS:
|
|
if symbol:
|
|
print('extern t%s *%s;' % (symbol, symbol))
|
|
else:
|
|
print("")
|
|
|
|
print("")
|
|
print("enum {")
|
|
print(" CUEW_SUCCESS = 0,")
|
|
print(" CUEW_ERROR_OPEN_FAILED = -1,")
|
|
print(" CUEW_ERROR_ATEXIT_FAILED = -2,")
|
|
print("};")
|
|
print("")
|
|
print("int %sInit(void);" % (LIB.lower()))
|
|
# TODO(sergey): Get rid of hardcoded CUresult.
|
|
print("const char *%sErrorString(CUresult result);" % (LIB.lower()))
|
|
print("const char *cuewCompilerPath(void);")
|
|
print("int cuewCompilerVersion(void);")
|
|
|
|
close_header_guard()
|
|
|
|
|
|
def print_dl_wrapper():
|
|
print("""#ifdef _WIN32
|
|
# define WIN32_LEAN_AND_MEAN
|
|
# define VC_EXTRALEAN
|
|
# include <windows.h>
|
|
|
|
/* Utility macros. */
|
|
|
|
typedef HMODULE DynamicLibrary;
|
|
|
|
# define dynamic_library_open(path) LoadLibraryA(path)
|
|
# define dynamic_library_close(lib) FreeLibrary(lib)
|
|
# define dynamic_library_find(lib, symbol) GetProcAddress(lib, symbol)
|
|
#else
|
|
# include <dlfcn.h>
|
|
|
|
typedef void* DynamicLibrary;
|
|
|
|
# define dynamic_library_open(path) dlopen(path, RTLD_NOW)
|
|
# define dynamic_library_close(lib) dlclose(lib)
|
|
# define dynamic_library_find(lib, symbol) dlsym(lib, symbol)
|
|
#endif
|
|
""")
|
|
|
|
|
|
def print_dl_helper_macro():
|
|
print("""#define _LIBRARY_FIND_CHECKED(lib, name) \\
|
|
name = (t##name *)dynamic_library_find(lib, #name); \\
|
|
assert(name);
|
|
|
|
#define _LIBRARY_FIND(lib, name) \\
|
|
name = (t##name *)dynamic_library_find(lib, #name);
|
|
|
|
#define %s_LIBRARY_FIND_CHECKED(name) \\
|
|
_LIBRARY_FIND_CHECKED(cuda_lib, name)
|
|
#define %s_LIBRARY_FIND(name) _LIBRARY_FIND(cuda_lib, name)
|
|
|
|
#define NVRTC_LIBRARY_FIND_CHECKED(name) \\
|
|
_LIBRARY_FIND_CHECKED(nvrtc_lib, name)
|
|
#define NVRTC_LIBRARY_FIND(name) _LIBRARY_FIND(nvrtc_lib, name)
|
|
|
|
static DynamicLibrary cuda_lib;
|
|
static DynamicLibrary nvrtc_lib;""" % (REAL_LIB, REAL_LIB))
|
|
print("")
|
|
|
|
|
|
def print_dl_helpers():
|
|
print("""static DynamicLibrary dynamic_library_open_find(const char **paths) {
|
|
int i = 0;
|
|
while (paths[i] != NULL) {
|
|
DynamicLibrary lib = dynamic_library_open(paths[i]);
|
|
if (lib != NULL) {
|
|
return lib;
|
|
}
|
|
++i;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void %sExit(void) {
|
|
if(cuda_lib != NULL) {
|
|
/* Ignore errors. */
|
|
dynamic_library_close(cuda_lib);
|
|
cuda_lib = NULL;
|
|
}
|
|
}""" % (LIB.lower()))
|
|
print("")
|
|
|
|
|
|
def print_lib_path():
|
|
# TODO(sergey): get rid of hardcoded libraries.
|
|
print("""#ifdef _WIN32
|
|
/* Expected in c:/windows/system or similar, no path needed. */
|
|
const char *cuda_paths[] = {"nvcuda.dll", NULL};
|
|
const char *nvrtc_paths[] = {"nvrtc.dll", NULL};
|
|
#elif defined(__APPLE__)
|
|
/* Default installation path. */
|
|
const char *cuda_paths[] = {"/usr/local/cuda/lib/libcuda.dylib", NULL};
|
|
const char *nvrtc_paths[] = {"/usr/local/cuda/lib/libnvrtc.dylib", NULL};
|
|
#else
|
|
const char *cuda_paths[] = {"libcuda.so", NULL};
|
|
const char *nvrtc_paths[] = {"libnvrtc.so",
|
|
# if defined(__x86_64__) || defined(_M_X64)
|
|
"/usr/local/cuda/lib64/libnvrtc.so",
|
|
#else
|
|
"/usr/local/cuda/lib/libnvrtc.so",
|
|
#endif
|
|
NULL};
|
|
#endif""")
|
|
|
|
|
|
def print_init_guard():
|
|
print(""" static int initialized = 0;
|
|
static int result = 0;
|
|
int error, driver_version;
|
|
|
|
if (initialized) {
|
|
return result;
|
|
}
|
|
|
|
initialized = 1;
|
|
|
|
error = atexit(cuewExit);
|
|
if (error) {
|
|
result = CUEW_ERROR_ATEXIT_FAILED;
|
|
return result;
|
|
}
|
|
|
|
/* Load library. */
|
|
cuda_lib = dynamic_library_open_find(cuda_paths);
|
|
nvrtc_lib = dynamic_library_open_find(nvrtc_paths);
|
|
|
|
/* CUDA library is mandatory to have, while nvrtc might be missing. */
|
|
if (cuda_lib == NULL) {
|
|
result = CUEW_ERROR_OPEN_FAILED;
|
|
return result;
|
|
}""")
|
|
print("")
|
|
|
|
|
|
def print_driver_version_guard():
|
|
# TODO(sergey): Currently it's hardcoded for CUDA only.
|
|
print(""" /* Detect driver version. */
|
|
driver_version = 1000;
|
|
|
|
%s_LIBRARY_FIND_CHECKED(cuDriverGetVersion);
|
|
if (cuDriverGetVersion) {
|
|
cuDriverGetVersion(&driver_version);
|
|
}
|
|
|
|
/* We require version 4.0. */
|
|
if (driver_version < 4000) {
|
|
result = CUEW_ERROR_OPEN_FAILED;
|
|
return result;
|
|
}""" % (REAL_LIB))
|
|
|
|
|
|
def print_dl_init():
|
|
print("int %sInit(void) {" % (LIB.lower()))
|
|
|
|
print(" /* Library paths. */")
|
|
print_lib_path()
|
|
print_init_guard()
|
|
print_driver_version_guard()
|
|
|
|
print(" /* Fetch all function pointers. */")
|
|
for symbol in SYMBOLS:
|
|
if symbol:
|
|
if not symbol.startswith('nvrtc'):
|
|
print(" %s_LIBRARY_FIND(%s);" % (REAL_LIB, symbol))
|
|
else:
|
|
print("")
|
|
|
|
print(" if (nvrtc_lib != NULL) {")
|
|
for symbol in SYMBOLS:
|
|
if symbol and symbol.startswith('nvrtc'):
|
|
print(" NVRTC_LIBRARY_FIND(%s);" % (symbol))
|
|
print(" }")
|
|
|
|
print("")
|
|
print(" result = CUEW_SUCCESS;")
|
|
print(" return result;")
|
|
|
|
print("}")
|
|
|
|
|
|
def print_implementation():
|
|
print_copyright()
|
|
|
|
# TODO(sergey): Get rid of hardcoded header.
|
|
print("""#ifdef _MSC_VER
|
|
# define snprintf _snprintf
|
|
# define popen _popen
|
|
# define pclose _pclose
|
|
# define _CRT_SECURE_NO_WARNINGS
|
|
#endif
|
|
""")
|
|
print("#include <cuew.h>")
|
|
print("#include <assert.h>")
|
|
print("#include <stdio.h>")
|
|
print("#include <string.h>")
|
|
print("#include <sys/stat.h>")
|
|
print("")
|
|
|
|
print_dl_wrapper()
|
|
print_dl_helper_macro()
|
|
|
|
print("/* Function definitions. */")
|
|
for symbol in SYMBOLS:
|
|
if symbol:
|
|
print('t%s *%s;' % (symbol, symbol))
|
|
else:
|
|
print("")
|
|
print("")
|
|
|
|
print_dl_helpers()
|
|
|
|
print("/* Implementation function. */")
|
|
print_dl_init()
|
|
|
|
print("")
|
|
# TODO(sergey): Get rid of hardcoded CUresult.
|
|
print("const char *%sErrorString(CUresult result) {" % (LIB.lower()))
|
|
print(" switch(result) {")
|
|
print(" case CUDA_SUCCESS: return \"No errors\";")
|
|
|
|
for error in ERRORS:
|
|
if error in CUDA_ERRORS:
|
|
str = CUDA_ERRORS[error]
|
|
else:
|
|
temp = error[11:].replace('_', ' ')
|
|
str = temp[0] + temp[1:].lower()
|
|
print(" case %s: return \"%s\";" % (error, str))
|
|
|
|
print(" default: return \"Unknown CUDA error value\";")
|
|
print(" }")
|
|
print("}")
|
|
|
|
from cuda_extra import extra_code
|
|
print(extra_code)
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if len(sys.argv) != 2 and len(sys.argv) != 3:
|
|
print("Usage: %s hdr|impl [/path/to/cuda/toolkit/include]" %
|
|
(sys.argv[0]))
|
|
exit(1)
|
|
|
|
if len(sys.argv) == 3:
|
|
INCLUDE_DIR = sys.argv[2]
|
|
|
|
parse_files()
|
|
|
|
if sys.argv[1] == "hdr":
|
|
print_header()
|
|
elif sys.argv[1] == "impl":
|
|
print_implementation()
|
|
else:
|
|
print("Unknown command %s" % (sys.argv[1]))
|
|
exit(1)
|