1import numpy as np 

2 

3from pystencils.backends.cbackend import get_headers 

4from pystencils.backends.cuda_backend import generate_cuda 

5from pystencils.data_types import StructType 

6from pystencils.field import FieldType 

7from pystencils.gpucuda.texture_utils import ndarray_to_tex 

8from pystencils.include import get_pycuda_include_path, get_pystencils_include_path 

9from pystencils.interpolation_astnodes import InterpolatorAccess, TextureCachedField 

10from pystencils.kernel_wrapper import KernelWrapper 

11from pystencils.kernelparameters import FieldPointerSymbol 

12 

13USE_FAST_MATH = True 

14 

15 

16def get_cubic_interpolation_include_paths(): 

17 from os.path import join, dirname 

18 

19 return [join(dirname(__file__), "CubicInterpolationCUDA", "code"), 

20 join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] 

21 

22 

23def make_python_function(kernel_function_node, argument_dict=None, custom_backend=None): 

24 """ 

25 Creates a kernel function from an abstract syntax tree which 

26 was created e.g. by :func:`pystencils.gpucuda.create_cuda_kernel` 

27 or :func:`pystencils.gpucuda.created_indexed_cuda_kernel` 

28 

29 Args: 

30 kernel_function_node: the abstract syntax tree 

31 argument_dict: parameters passed here are already fixed. Remaining parameters have to be passed to the 

32 returned kernel functor. 

33 

34 Returns: 

35 compiled kernel as Python function 

36 """ 

37 import pycuda.autoinit # NOQA 

38 from pycuda.compiler import SourceModule 

39 

40 if argument_dict is None: 

41 argument_dict = {} 

42 

43 header_list = ['<cstdint>'] + list(get_headers(kernel_function_node)) 

44 includes = "\n".join(["#include %s" % (include_file,) for include_file in header_list]) 

45 

46 code = includes + "\n" 

47 code += "#define FUNC_PREFIX __global__\n" 

48 code += "#define RESTRICT __restrict__\n\n" 

49 code += str(generate_cuda(kernel_function_node, custom_backend=custom_backend)) 

50 textures = set(d.interpolator for d in kernel_function_node.atoms( 

51 InterpolatorAccess) if isinstance(d.interpolator, TextureCachedField)) 

52 

53 nvcc_options = ["-w", "-std=c++11", "-Wno-deprecated-gpu-targets"] 

54 if USE_FAST_MATH: 

55 nvcc_options.append("-use_fast_math") 

56 

57 # Code for CubicInterpolationCUDA 

58 from pystencils.interpolation_astnodes import InterpolationMode 

59 from os.path import join, dirname, isdir 

60 

61 if any(t.interpolation_mode == InterpolationMode.CUBIC_SPLINE for t in textures): 

62 assert isdir(join(dirname(__file__), ("CubicInterpolationCUDA", "code")), 

63 "Submodule CubicInterpolationCUDA does not exist.\n" 

64 + "Clone https://github.com/theHamsta/CubicInterpolationCUDA into pystencils.gpucuda") 

65 nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code")] 

66 nvcc_options += ["-I" + join(dirname(__file__), "CubicInterpolationCUDA", "code", "internal")] 

67 

68 needed_dims = set(t.field.spatial_dimensions for t in textures 

69 if t.interpolation_mode == InterpolationMode.CUBIC_SPLINE) 

70 for i in needed_dims: 

71 code = 'extern "C++" {\n#include "cubicTex%iD.cu"\n}\n' % i + code 

72 

73 mod = SourceModule(code, options=nvcc_options, include_dirs=[ 

74 get_pystencils_include_path(), get_pycuda_include_path()]) 

75 func = mod.get_function(kernel_function_node.function_name) 

76 

77 parameters = kernel_function_node.get_parameters() 

78 

79 cache = {} 

80 cache_values = [] 

81 

82 def wrapper(**kwargs): 

83 key = hash(tuple((k, v.ctypes.data, v.strides, v.shape) if isinstance(v, np.ndarray) else (k, id(v)) 

84 for k, v in kwargs.items())) 

85 try: 

86 args, block_and_thread_numbers = cache[key] 

87 func(*args, **block_and_thread_numbers) 

88 except KeyError: 

89 full_arguments = argument_dict.copy() 

90 full_arguments.update(kwargs) 

91 shape = _check_arguments(parameters, full_arguments) 

92 

93 indexing = kernel_function_node.indexing 

94 block_and_thread_numbers = indexing.call_parameters(shape) 

95 block_and_thread_numbers['block'] = tuple(int(i) for i in block_and_thread_numbers['block']) 

96 block_and_thread_numbers['grid'] = tuple(int(i) for i in block_and_thread_numbers['grid']) 

97 

98 # TODO: use texture objects: 

99 # https://devblogs.nvidia.com/cuda-pro-tip-kepler-texture-objects-improve-performance-and-flexibility/ 

100 for tex in textures: 

101 tex_ref = mod.get_texref(str(tex)) 

102 ndarray_to_tex(tex_ref, full_arguments[tex.field.name], tex.address_mode, 

103 tex.filter_mode, tex.use_normalized_coordinates, tex.read_as_integer) 

104 args = _build_numpy_argument_list(parameters, full_arguments) 

105 cache[key] = (args, block_and_thread_numbers) 

106 cache_values.append(kwargs) # keep objects alive such that ids remain unique 

107 func(*args, **block_and_thread_numbers) 

108 # import pycuda.driver as cuda 

109 # cuda.Context.synchronize() # useful for debugging, to get errors right after kernel was called 

110 ast = kernel_function_node 

111 parameters = kernel_function_node.get_parameters() 

112 wrapper = KernelWrapper(wrapper, parameters, ast) 

113 wrapper.num_regs = func.num_regs 

114 return wrapper 

115 

116 

117def _build_numpy_argument_list(parameters, argument_dict): 

118 argument_dict = {k: v for k, v in argument_dict.items()} 

119 result = [] 

120 

121 for param in parameters: 

122 if param.is_field_pointer: 

123 array = argument_dict[param.field_name] 

124 actual_type = array.dtype 

125 expected_type = param.fields[0].dtype.numpy_dtype 

126 if expected_type != actual_type: 

127 raise ValueError("Data type mismatch for field '%s'. Expected '%s' got '%s'." % 

128 (param.field_name, expected_type, actual_type)) 

129 result.append(array) 

130 elif param.is_field_stride: 

131 cast_to_dtype = param.symbol.dtype.numpy_dtype.type 

132 array = argument_dict[param.field_name] 

133 stride = cast_to_dtype(array.strides[param.symbol.coordinate] // array.dtype.itemsize) 

134 result.append(stride) 

135 elif param.is_field_shape: 

136 cast_to_dtype = param.symbol.dtype.numpy_dtype.type 

137 array = argument_dict[param.field_name] 

138 result.append(cast_to_dtype(array.shape[param.symbol.coordinate])) 

139 else: 

140 expected_type = param.symbol.dtype.numpy_dtype 

141 result.append(expected_type.type(argument_dict[param.symbol.name])) 

142 

143 assert len(result) == len(parameters) 

144 return result 

145 

146 

147def _check_arguments(parameter_specification, argument_dict): 

148 """ 

149 Checks if parameters passed to kernel match the description in the AST function node. 

150 If not it raises a ValueError, on success it returns the array shape that determines the CUDA blocks and threads 

151 """ 

152 argument_dict = {k: v for k, v in argument_dict.items()} 

153 array_shapes = set() 

154 index_arr_shapes = set() 

155 

156 for param in parameter_specification: 

157 if isinstance(param.symbol, FieldPointerSymbol): 

158 symbolic_field = param.fields[0] 

159 

160 try: 

161 field_arr = argument_dict[symbolic_field.name] 

162 except KeyError: 

163 raise KeyError("Missing field parameter for kernel call " + str(symbolic_field)) 

164 

165 if symbolic_field.has_fixed_shape: 

166 symbolic_field_shape = tuple(int(i) for i in symbolic_field.shape) 

167 if isinstance(symbolic_field.dtype, StructType): 

168 symbolic_field_shape = symbolic_field_shape[:-1] 

169 if symbolic_field_shape != field_arr.shape: 

170 raise ValueError("Passed array '%s' has shape %s which does not match expected shape %s" % 

171 (symbolic_field.name, str(field_arr.shape), str(symbolic_field.shape))) 

172 if symbolic_field.has_fixed_shape: 

173 symbolic_field_strides = tuple(int(i) * field_arr.dtype.itemsize for i in symbolic_field.strides) 

174 if isinstance(symbolic_field.dtype, StructType): 

175 symbolic_field_strides = symbolic_field_strides[:-1] 

176 if symbolic_field_strides != field_arr.strides: 

177 raise ValueError("Passed array '%s' has strides %s which does not match expected strides %s" % 

178 (symbolic_field.name, str(field_arr.strides), str(symbolic_field_strides))) 

179 

180 if FieldType.is_indexed(symbolic_field): 

181 index_arr_shapes.add(field_arr.shape[:symbolic_field.spatial_dimensions]) 

182 elif FieldType.is_generic(symbolic_field): 

183 array_shapes.add(field_arr.shape[:symbolic_field.spatial_dimensions]) 

184 

185 if len(array_shapes) > 1: 

186 raise ValueError("All passed arrays have to have the same size " + str(array_shapes)) 

187 if len(index_arr_shapes) > 1: 

188 raise ValueError("All passed index arrays have to have the same size " + str(array_shapes)) 

189 

190 if len(index_arr_shapes) > 0: 

191 return list(index_arr_shapes)[0] 

192 else: 

193 return list(array_shapes)[0]