1import functools 

2import itertools 

3from types import MappingProxyType 

4 

5import sympy as sp 

6 

7from pystencils.assignment import Assignment 

8from pystencils.astnodes import Block, Conditional, LoopOverCoordinate, SympyAssignment 

9from pystencils.cpu.vectorization import vectorize 

10from pystencils.field import Field, FieldType 

11from pystencils.gpucuda.indexing import indexing_creator_from_params 

12from pystencils.simp.assignment_collection import AssignmentCollection 

13from pystencils.stencil import direction_string_to_offset, inverse_direction_string 

14from pystencils.transformations import ( 

15 loop_blocking, move_constants_before_loop, remove_conditionals_in_staggered_kernel) 

16 

17 

18def create_kernel(assignments, 

19 target='cpu', 

20 data_type="double", 

21 iteration_slice=None, 

22 ghost_layers=None, 

23 skip_independence_check=False, 

24 cpu_openmp=False, 

25 cpu_vectorize_info=None, 

26 cpu_blocking=None, 

27 omp_single_loop=True, 

28 gpu_indexing='block', 

29 gpu_indexing_params=MappingProxyType({}), 

30 use_textures_for_interpolation=True, 

31 cpu_prepend_optimizations=[], 

32 use_auto_for_assignments=False, 

33 opencl_queue=None, 

34 opencl_ctx=None): 

35 """ 

36 Creates abstract syntax tree (AST) of kernel, using a list of update equations. 

37 

38 Args: 

39 assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection` 

40 target: 'cpu', 'llvm', 'gpu' or 'opencl' 

41 data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name 

42 to type 

43 iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \ 

44 part of the field is iterated over 

45 ghost_layers: a single integer specifies the ghost layer count at all borders, can also be a sequence of 

46 pairs ``[(x_lower_gl, x_upper_gl), .... ]``. These layers are excluded from the iteration. 

47 If left to default, the number of ghost layers is determined automatically. 

48 skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for 

49 periodicity kernel, that access the field outside the iteration bounds. Use with care! 

50 cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP 

51 omp_single_loop: if OpenMP is active: whether multiple outer loops are permitted 

52 cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal' 

53 for documentation of these parameters see vectorize function. Example: 

54 '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}' 

55 cpu_blocking: a tuple of block sizes or None if no blocking should be applied 

56 gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing` 

57 gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class) 

58 e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }' 

59 cpu_prepend_optimizations: list of extra optimizations to perform first on the AST 

60 

61 Returns: 

62 abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or 

63 can be compiled with through its 'compile()' member 

64 

65 Example: 

66 >>> import pystencils as ps 

67 >>> import numpy as np 

68 >>> s, d = ps.fields('s, d: [2D]') 

69 >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0]) 

70 >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True) 

71 >>> kernel = ast.compile() 

72 >>> d_arr = np.zeros([5, 5]) 

73 >>> kernel(d=d_arr, s=np.ones([5, 5])) 

74 >>> d_arr 

75 array([[0., 0., 0., 0., 0.], 

76 [0., 4., 4., 4., 0.], 

77 [0., 4., 4., 4., 0.], 

78 [0., 4., 4., 4., 0.], 

79 [0., 0., 0., 0., 0.]]) 

80 """ 

81 # ---- Normalizing parameters 

82 if isinstance(assignments, Assignment): 

83 assignments = [assignments] 

84 assert assignments, "Assignments must not be empty!" 

85 split_groups = () 

86 if isinstance(assignments, AssignmentCollection): 

87 if 'split_groups' in assignments.simplification_hints: 87 ↛ 88line 87 didn't jump to line 88, because the condition on line 87 was never true

88 split_groups = assignments.simplification_hints['split_groups'] 

89 assignments = assignments.all_assignments 

90 

91 # ---- Creating ast 

92 if target == 'cpu': 92 ↛ 118line 92 didn't jump to line 118, because the condition on line 92 was never false

93 from pystencils.cpu import create_kernel 

94 from pystencils.cpu import add_openmp 

95 ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, 

96 iteration_slice=iteration_slice, ghost_layers=ghost_layers, 

97 skip_independence_check=skip_independence_check) 

98 for optimization in cpu_prepend_optimizations: 

99 optimization(ast) 

100 omp_collapse = None 

101 if cpu_blocking: 101 ↛ 102line 101 didn't jump to line 102, because the condition on line 101 was never true

102 omp_collapse = loop_blocking(ast, cpu_blocking) 

103 if cpu_openmp: 

104 add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse, assume_single_outer_loop=omp_single_loop) 

105 if cpu_vectorize_info: 

106 if cpu_vectorize_info is True: 106 ↛ 107line 106 didn't jump to line 107, because the condition on line 106 was never true

107 vectorize(ast) 

108 elif isinstance(cpu_vectorize_info, dict): 108 ↛ 117line 108 didn't jump to line 117, because the condition on line 108 was never false

109 vectorize(ast, **cpu_vectorize_info) 

110 if cpu_openmp and cpu_blocking and 'nontemporal' in cpu_vectorize_info and \ 110 ↛ 115line 110 didn't jump to line 115, because the condition on line 110 was never true

111 cpu_vectorize_info['nontemporal'] and 'cachelineZero' in ast.instruction_set: 

112 # This condition is stricter than it needs to be: if blocks along the fastest axis start on a 

113 # cache line boundary, it's okay. But we cannot determine that here. 

114 # We don't need to disallow OpenMP collapsing because it is never applied to the inner loop. 

115 raise ValueError("Blocking cannot be combined with cacheline-zeroing") 

116 else: 

117 raise ValueError("Invalid value for cpu_vectorize_info") 

118 elif target == 'llvm': 

119 from pystencils.llvm import create_kernel 

120 ast = create_kernel(assignments, type_info=data_type, split_groups=split_groups, 

121 iteration_slice=iteration_slice, ghost_layers=ghost_layers) 

122 elif target == 'gpu' or target == 'opencl': 

123 from pystencils.gpucuda import create_cuda_kernel 

124 ast = create_cuda_kernel(assignments, type_info=data_type, 

125 indexing_creator=indexing_creator_from_params(gpu_indexing, gpu_indexing_params), 

126 iteration_slice=iteration_slice, ghost_layers=ghost_layers, 

127 skip_independence_check=skip_independence_check, 

128 use_textures_for_interpolation=use_textures_for_interpolation) 

129 if target == 'opencl': 

130 from pystencils.opencl.opencljit import make_python_function 

131 ast._backend = 'opencl' 

132 ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx) 

133 ast._target = 'opencl' 

134 ast._backend = 'opencl' 

135 return ast 

136 else: 

137 raise ValueError(f"Unknown target {target}. Has to be one of 'cpu', 'gpu' or 'llvm' ") 

138 

139 if use_auto_for_assignments: 139 ↛ 140line 139 didn't jump to line 140, because the condition on line 139 was never true

140 for a in ast.atoms(SympyAssignment): 

141 a.use_auto = True 

142 

143 return ast 

144 

145 

146def create_indexed_kernel(assignments, 

147 index_fields, 

148 target='cpu', 

149 data_type="double", 

150 coordinate_names=('x', 'y', 'z'), 

151 cpu_openmp=True, 

152 gpu_indexing='block', 

153 gpu_indexing_params=MappingProxyType({}), 

154 use_textures_for_interpolation=True, 

155 opencl_queue=None, 

156 opencl_ctx=None): 

157 """ 

158 Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with 

159 coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. 

160 

161 The coordinates are stored in a separated index_field, which is a one dimensional array with struct data type. 

162 This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the 

163 'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for 

164 example boundary parameters. 

165 

166 index_fields: list of index fields, i.e. 1D fields with struct data type 

167 coordinate_names: name of the coordinate fields in the struct data type 

168 

169 Example: 

170 >>> import pystencils as ps 

171 >>> import numpy as np 

172 >>> 

173 >>> # Index field stores the indices of the cell to visit together with optional values 

174 >>> index_arr_dtype = np.dtype([('x', np.int32), ('y', np.int32), ('val', np.double)]) 

175 >>> index_arr = np.array([(1, 1, 0.1), (2, 2, 0.2), (3, 3, 0.3)], dtype=index_arr_dtype) 

176 >>> idx_field = ps.fields(idx=index_arr) 

177 >>> 

178 >>> # Additional values stored in index field can be accessed in the kernel as well 

179 >>> s, d = ps.fields('s, d: [2D]') 

180 >>> assignment = ps.Assignment(d[0,0], 2 * s[0, 1] + 2 * s[1, 0] + idx_field('val')) 

181 >>> ast = create_indexed_kernel(assignment, [idx_field], coordinate_names=('x', 'y')) 

182 >>> kernel = ast.compile() 

183 >>> d_arr = np.zeros([5, 5]) 

184 >>> kernel(s=np.ones([5, 5]), d=d_arr, idx=index_arr) 

185 >>> d_arr 

186 array([[0. , 0. , 0. , 0. , 0. ], 

187 [0. , 4.1, 0. , 0. , 0. ], 

188 [0. , 0. , 4.2, 0. , 0. ], 

189 [0. , 0. , 0. , 4.3, 0. ], 

190 [0. , 0. , 0. , 0. , 0. ]]) 

191 """ 

192 if isinstance(assignments, Assignment): 

193 assignments = [assignments] 

194 elif isinstance(assignments, AssignmentCollection): 

195 assignments = assignments.all_assignments 

196 if target == 'cpu': 

197 from pystencils.cpu import create_indexed_kernel 

198 from pystencils.cpu import add_openmp 

199 ast = create_indexed_kernel(assignments, index_fields=index_fields, type_info=data_type, 

200 coordinate_names=coordinate_names) 

201 if cpu_openmp: 

202 add_openmp(ast, num_threads=cpu_openmp) 

203 return ast 

204 elif target == 'llvm': 

205 raise NotImplementedError("Indexed kernels are not yet supported in LLVM backend") 

206 elif target == 'gpu' or target == 'opencl': 

207 from pystencils.gpucuda import created_indexed_cuda_kernel 

208 idx_creator = indexing_creator_from_params(gpu_indexing, gpu_indexing_params) 

209 ast = created_indexed_cuda_kernel(assignments, 

210 index_fields, 

211 type_info=data_type, 

212 coordinate_names=coordinate_names, 

213 indexing_creator=idx_creator, 

214 use_textures_for_interpolation=use_textures_for_interpolation) 

215 if target == 'opencl': 

216 from pystencils.opencl.opencljit import make_python_function 

217 ast._backend = 'opencl' 

218 ast.compile = functools.partial(make_python_function, ast, opencl_queue, opencl_ctx) 

219 ast._target = 'opencl' 

220 ast._backend = 'opencl' 

221 return ast 

222 else: 

223 raise ValueError(f"Unknown target {target}. Has to be either 'cpu' or 'gpu'") 

224 

225 

226def create_staggered_kernel(assignments, target='cpu', gpu_exclusive_conditions=False, **kwargs): 

227 """Kernel that updates a staggered field. 

228 

229 .. image:: /img/staggered_grid.svg 

230 

231 For a staggered field, the first index coordinate defines the location of the staggered value. 

232 Further index coordinates can be used to store vectors/tensors at each point. 

233 

234 Args: 

235 assignments: a sequence of assignments or an AssignmentCollection. 

236 Assignments to staggered field are processed specially, while subexpressions and assignments to 

237 regular fields are passed through to `create_kernel`. Multiple different staggered fields can be 

238 used, but they all need to use the same stencil (i.e. the same number of staggered points) and 

239 shape. 

240 target: 'cpu', 'llvm' or 'gpu' 

241 gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then 

242 handled in an else branch. 

243 kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed 

244 

245 Returns: 

246 AST, see `create_kernel` 

247 """ 

248 assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs and 'omp_single_loop' not in kwargs 

249 

250 if isinstance(assignments, AssignmentCollection): 250 ↛ 259line 250 didn't jump to line 259, because the condition on line 250 was never false

251 subexpressions = assignments.subexpressions + [a for a in assignments.main_assignments 

252 if not hasattr(a, 'lhs') 

253 or type(a.lhs) is not Field.Access 

254 or not FieldType.is_staggered(a.lhs.field)] 

255 assignments = [a for a in assignments.main_assignments if hasattr(a, 'lhs') 

256 and type(a.lhs) is Field.Access 

257 and FieldType.is_staggered(a.lhs.field)] 

258 else: 

259 subexpressions = [a for a in assignments if not hasattr(a, 'lhs') 

260 or type(a.lhs) is not Field.Access 

261 or not FieldType.is_staggered(a.lhs.field)] 

262 assignments = [a for a in assignments if hasattr(a, 'lhs') 

263 and type(a.lhs) is Field.Access 

264 and FieldType.is_staggered(a.lhs.field)] 

265 if len(set([tuple(a.lhs.field.staggered_stencil) for a in assignments])) != 1: 265 ↛ 266line 265 didn't jump to line 266, because the condition on line 265 was never true

266 raise ValueError("All assignments need to be made to staggered fields with the same stencil") 

267 if len(set([a.lhs.field.shape for a in assignments])) != 1: 267 ↛ 268line 267 didn't jump to line 268, because the condition on line 267 was never true

268 raise ValueError("All assignments need to be made to staggered fields with the same shape") 

269 

270 staggered_field = assignments[0].lhs.field 

271 stencil = staggered_field.staggered_stencil 

272 dim = staggered_field.spatial_dimensions 

273 shape = staggered_field.shape 

274 

275 counters = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)] 

276 

277 final_assignments = [] 

278 

279 # find out whether any of the ghost layers is not needed 

280 common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) 

281 for direction in stencil: 

282 exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) 

283 for elementary_direction in direction: 

284 exclusions.remove(inverse_direction_string(elementary_direction)) 

285 common_exclusions.intersection_update(exclusions) 

286 ghost_layers = [[0, 0] for d in range(dim)] 

287 for direction in common_exclusions: 

288 direction = direction_string_to_offset(direction) 

289 for d, s in enumerate(direction): 

290 if s == 1: 290 ↛ 291line 290 didn't jump to line 291, because the condition on line 290 was never true

291 ghost_layers[d][1] = 1 

292 elif s == -1: 

293 ghost_layers[d][0] = 1 

294 

295 def condition(direction): 

296 """exclude those staggered points that correspond to fluxes between ghost cells""" 

297 exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim]) 

298 

299 for elementary_direction in direction: 

300 exclusions.remove(inverse_direction_string(elementary_direction)) 

301 conditions = [] 

302 for e in exclusions: 

303 if e in common_exclusions: 

304 continue 

305 offset = direction_string_to_offset(e) 

306 for i, o in enumerate(offset): 

307 if o == 1: 

308 conditions.append(counters[i] < shape[i] - 1) 

309 elif o == -1: 309 ↛ 310line 309 didn't jump to line 310, because the condition on line 309 was never true

310 conditions.append(counters[i] > 0) 

311 return sp.And(*conditions) 

312 

313 if gpu_exclusive_conditions: 313 ↛ 314line 313 didn't jump to line 314, because the condition on line 313 was never true

314 outer_assignment = None 

315 conditions = {direction: condition(direction) for direction in stencil} 

316 for num_conditions in range(len(stencil)): 

317 for combination in itertools.combinations(conditions.values(), num_conditions): 

318 for assignment in assignments: 

319 direction = stencil[assignment.lhs.index[0]] 

320 if conditions[direction] in combination: 

321 assignment = SympyAssignment(assignment.lhs, assignment.rhs) 

322 outer_assignment = Conditional(sp.And(*combination), Block([assignment]), outer_assignment) 

323 

324 inner_assignment = [] 

325 for assignment in assignments: 

326 direction = stencil[assignment.lhs.index[0]] 

327 inner_assignment.append(SympyAssignment(assignment.lhs, assignment.rhs)) 

328 last_conditional = Conditional(sp.And(*[condition(d) for d in stencil]), 

329 Block(inner_assignment), outer_assignment) 

330 final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \ 

331 [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \ 

332 [last_conditional] 

333 

334 if target == 'cpu': 

335 from pystencils.cpu import create_kernel as create_kernel_cpu 

336 ast = create_kernel_cpu(final_assignments, ghost_layers=ghost_layers, omp_single_loop=False, **kwargs) 

337 else: 

338 ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs) 

339 return ast 

340 

341 for assignment in assignments: 

342 direction = stencil[assignment.lhs.index[0]] 

343 sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \ 

344 [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \ 

345 [SympyAssignment(assignment.lhs, assignment.rhs)] 

346 last_conditional = Conditional(condition(direction), Block(sp_assignments)) 

347 final_assignments.append(last_conditional) 

348 

349 remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers]) 

350 prepend_optimizations = [lambda ast: remove_conditionals_in_staggered_kernel(ast, remove_start_conditional), 

351 move_constants_before_loop] 

352 ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, omp_single_loop=False, 

353 cpu_prepend_optimizations=prepend_optimizations, **kwargs) 

354 return ast