1import numpy as np 

2 

3from pystencils.backends.cbackend import get_headers 

4from pystencils.backends.opencl_backend import generate_opencl 

5from pystencils.gpucuda.cudajit import _build_numpy_argument_list, _check_arguments 

6from pystencils.include import get_pystencils_include_path 

7from pystencils.kernel_wrapper import KernelWrapper 

8 

9USE_FAST_MATH = True 

10 

11 

12_global_cl_ctx = None 

13_global_cl_queue = None 

14 

15 

16def get_global_cl_queue(): 

17 return _global_cl_queue 

18 

19 

20def get_global_cl_ctx(): 

21 return _global_cl_ctx 

22 

23 

24def init_globally(device_index=0): 

25 import pyopencl as cl 

26 global _global_cl_ctx 

27 global _global_cl_queue 

28 _global_cl_ctx = cl.create_some_context(device_index) 

29 _global_cl_queue = cl.CommandQueue(_global_cl_ctx) 

30 

31 

32def init_globally_with_context(opencl_ctx, opencl_queue): 

33 global _global_cl_ctx 

34 global _global_cl_queue 

35 _global_cl_ctx = opencl_ctx 

36 _global_cl_queue = opencl_queue 

37 

38 

39def clear_global_ctx(): 

40 global _global_cl_ctx 

41 global _global_cl_queue 

42 _global_cl_ctx = None 

43 _global_cl_queue = None 

44 

45 

46def make_python_function(kernel_function_node, opencl_queue, opencl_ctx, argument_dict=None, custom_backend=None): 

47 """ 

48 Creates a **OpenCL** kernel function from an abstract syntax tree which 

49 was created for the ``target='gpu'`` e.g. by :func:`pystencils.gpucuda.create_cuda_kernel` 

50 or :func:`pystencils.gpucuda.created_indexed_cuda_kernel` 

51 

52 Args: 

53 opencl_queue: a valid :class:`pyopencl.CommandQueue` 

54 opencl_ctx: a valid :class:`pyopencl.Context` 

55 kernel_function_node: the abstract syntax tree 

56 argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the 

57 returned kernel functor. 

58 

59 Returns: 

60 compiled kernel as Python function 

61 """ 

62 import pyopencl as cl 

63 

64 if not opencl_ctx: 

65 opencl_ctx = _global_cl_ctx 

66 if not opencl_queue: 

67 opencl_queue = _global_cl_queue 

68 

69 assert opencl_ctx, "No valid OpenCL context!\n" \ 

70 "Use `import pystencils.opencl.autoinit` if you want it to be automatically created" 

71 assert opencl_queue, "No valid OpenCL queue!\n" \ 

72 "Use `import pystencils.opencl.autoinit` if you want it to be automatically created" 

73 

74 if argument_dict is None: 

75 argument_dict = {} 

76 

77 # check if double precision is supported and required 

78 if any([d.double_fp_config == 0 for d in opencl_ctx.devices]): 

79 for param in kernel_function_node.get_parameters(): 

80 if param.symbol.dtype.base_type: 

81 if param.symbol.dtype.base_type.numpy_dtype == np.float64: 

82 raise ValueError('OpenCL device does not support double precision') 

83 else: 

84 if param.symbol.dtype.numpy_dtype == np.float64: 

85 raise ValueError('OpenCL device does not support double precision') 

86 

87 # Changing of kernel name necessary since compilation with default name "kernel" is not possible (OpenCL keyword!) 

88 kernel_function_node.function_name = "opencl_" + kernel_function_node.function_name 

89 header_list = ['"opencl_stdint.h"'] + list(get_headers(kernel_function_node)) 

90 includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list]) 

91 

92 code = includes + "\n" 

93 code += "#define FUNC_PREFIX __kernel\n" 

94 code += "#define RESTRICT restrict\n\n" 

95 code += str(generate_opencl(kernel_function_node, custom_backend=custom_backend)) 

96 options = [] 

97 if USE_FAST_MATH: 

98 options.append("-cl-unsafe-math-optimizations") 

99 options.append("-cl-mad-enable") 

100 options.append("-cl-fast-relaxed-math") 

101 options.append("-cl-finite-math-only") 

102 options.append("-I") 

103 options.append(get_pystencils_include_path()) 

104 mod = cl.Program(opencl_ctx, code).build(options=options) 

105 func = getattr(mod, kernel_function_node.function_name) 

106 

107 parameters = kernel_function_node.get_parameters() 

108 

109 cache = {} 

110 cache_values = [] 

111 

112 def wrapper(**kwargs): 

113 key = hash(tuple((k, v.ctypes.data, v.strides, v.shape) if isinstance(v, np.ndarray) else (k, id(v)) 

114 for k, v in kwargs.items())) 

115 try: 

116 args, block_and_thread_numbers = cache[key] 

117 func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args) 

118 except KeyError: 

119 full_arguments = argument_dict.copy() 

120 full_arguments.update(kwargs) 

121 assert not any(isinstance(a, np.ndarray) 

122 for a in full_arguments.values()), 'Calling a OpenCL kernel with a Numpy array!' 

123 assert not any('pycuda' in str(type(a)) 

124 for a in full_arguments.values()), 'Calling a OpenCL kernel with a PyCUDA array!' 

125 shape = _check_arguments(parameters, full_arguments) 

126 

127 indexing = kernel_function_node.indexing 

128 block_and_thread_numbers = indexing.call_parameters(shape) 

129 block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block']) 

130 block_and_thread_numbers['grid'] = tuple(int(b * g) for (b, g) in zip(block_and_thread_numbers['block'], 

131 block_and_thread_numbers['grid'])) 

132 

133 args = _build_numpy_argument_list(parameters, full_arguments) 

134 args = [a.data if hasattr(a, 'data') else a for a in args] 

135 cache[key] = (args, block_and_thread_numbers) 

136 cache_values.append(kwargs) # keep objects alive such that ids remain unique 

137 func(opencl_queue, block_and_thread_numbers['grid'], block_and_thread_numbers['block'], *args) 

138 

139 wrapper.ast = kernel_function_node 

140 wrapper.parameters = kernel_function_node.get_parameters() 

141 wrapper = KernelWrapper(wrapper, parameters, kernel_function_node) 

142 return wrapper