1from typing import List, Union 

2 

3import sympy as sp 

4 

5import pystencils.astnodes as ast 

6from pystencils.assignment import Assignment 

7from pystencils.astnodes import Block, KernelFunction, LoopOverCoordinate, SympyAssignment 

8from pystencils.cpu.cpujit import make_python_function 

9from pystencils.data_types import BasicType, StructType, TypedSymbol, create_type 

10from pystencils.field import Field, FieldType 

11from pystencils.transformations import ( 

12 add_types, filtered_tree_iteration, get_base_buffer_index, get_optimal_loop_ordering, 

13 implement_interpolations, make_loop_over_domain, move_constants_before_loop, 

14 parse_base_pointer_info, resolve_buffer_accesses, resolve_field_accesses, split_inner_loop) 

15 

16AssignmentOrAstNodeList = List[Union[Assignment, ast.Node]] 

17 

18 

19def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "kernel", type_info='double', 

20 split_groups=(), iteration_slice=None, ghost_layers=None, 

21 skip_independence_check=False) -> KernelFunction: 

22 """Creates an abstract syntax tree for a kernel function, by taking a list of update rules. 

23 

24 Loops are created according to the field accesses in the equations. 

25 

26 Args: 

27 assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`. 

28 Defining the update rules of the kernel 

29 function_name: name of the generated function - only important if generated code is written out 

30 type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to 

31 be of type 'double' except symbols which occur on the left hand side of equations where the 

32 right hand side is a sympy Boolean which are assumed to be 'bool' . 

33 split_groups: Specification on how to split up inner loop into multiple loops. For details see 

34 transformation :func:`pystencils.transformation.split_inner_loop` 

35 iteration_slice: if not None, iteration is done only over this slice of the field 

36 ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers 

37 that should be excluded from the iteration. 

38 if None, the number of ghost layers is determined automatically and assumed to be equal for a 

39 all dimensions 

40 skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for 

41 periodicity kernel, that access the field outside the iteration bounds. Use with care! 

42 

43 Returns: 

44 AST node representing a function, that can be printed as C or CUDA code 

45 """ 

46 def type_symbol(term): 

47 if isinstance(term, Field.Access) or isinstance(term, TypedSymbol): 

48 return term 

49 elif isinstance(term, sp.Symbol): 

50 if isinstance(type_info, str) or not hasattr(type_info, '__getitem__'): 

51 return TypedSymbol(term.name, create_type(type_info)) 

52 else: 

53 return TypedSymbol(term.name, type_info[term.name]) 

54 else: 

55 raise ValueError("Term has to be field access or symbol") 

56 

57 fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check) 

58 all_fields = fields_read.union(fields_written) 

59 read_only_fields = set([f.name for f in fields_read - fields_written]) 

60 

61 buffers = set([f for f in all_fields if FieldType.is_buffer(f)]) 

62 fields_without_buffers = all_fields - buffers 

63 

64 body = ast.Block(assignments) 

65 loop_order = get_optimal_loop_ordering(fields_without_buffers) 

66 loop_node, ghost_layer_info = make_loop_over_domain(body, iteration_slice=iteration_slice, 

67 ghost_layers=ghost_layers, loop_order=loop_order) 

68 ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function, 

69 ghost_layers=ghost_layer_info, function_name=function_name, assignments=assignments) 

70 implement_interpolations(body) 

71 

72 if split_groups: 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true

73 typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups] 

74 split_inner_loop(ast_node, typed_split_groups) 

75 

76 base_pointer_spec = [['spatialInner0'], ['spatialInner1']] if len(loop_order) >= 2 else [['spatialInner0']] 

77 base_pointer_info = {field.name: parse_base_pointer_info(base_pointer_spec, loop_order, 

78 field.spatial_dimensions, field.index_dimensions) 

79 for field in fields_without_buffers} 

80 

81 buffer_base_pointer_info = {field.name: parse_base_pointer_info([['spatialInner0']], [0], 

82 field.spatial_dimensions, field.index_dimensions) 

83 for field in buffers} 

84 base_pointer_info.update(buffer_base_pointer_info) 

85 

86 if any(FieldType.is_buffer(f) for f in all_fields): 86 ↛ 87line 86 didn't jump to line 87, because the condition on line 86 was never true

87 resolve_buffer_accesses(ast_node, get_base_buffer_index(ast_node), read_only_fields) 

88 resolve_field_accesses(ast_node, read_only_fields, field_to_base_pointer_info=base_pointer_info) 

89 move_constants_before_loop(ast_node) 

90 return ast_node 

91 

92 

93def create_indexed_kernel(assignments: AssignmentOrAstNodeList, index_fields, function_name="kernel", 

94 type_info=None, coordinate_names=('x', 'y', 'z')) -> KernelFunction: 

95 """ 

96 Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with 

97 coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. 

98 

99 The coordinates are stored in a separate index_field, which is a one dimensional array with struct data type. 

100 This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the 

101 'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for 

102 example boundary parameters. 

103 

104 Args: 

105 assignments: list of assignments 

106 index_fields: list of index fields, i.e. 1D fields with struct data type 

107 type_info: see documentation of :func:`create_kernel` 

108 function_name: see documentation of :func:`create_kernel` 

109 coordinate_names: name of the coordinate fields in the struct data type 

110 """ 

111 fields_read, fields_written, assignments = add_types(assignments, type_info, check_independence_condition=False) 

112 all_fields = fields_read.union(fields_written) 

113 

114 for index_field in index_fields: 

115 index_field.field_type = FieldType.INDEXED 

116 assert FieldType.is_indexed(index_field) 

117 assert index_field.spatial_dimensions == 1, "Index fields have to be 1D" 

118 

119 non_index_fields = [f for f in all_fields if f not in index_fields] 

120 spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} 

121 assert len(spatial_coordinates) == 1, "Non-index fields do not have the same number of spatial coordinates" 

122 spatial_coordinates = list(spatial_coordinates)[0] 

123 

124 def get_coordinate_symbol_assignment(name): 

125 for idx_field in index_fields: 

126 assert isinstance(idx_field.dtype, StructType), "Index fields have to have a struct data type" 

127 data_type = idx_field.dtype 

128 if data_type.has_element(name): 

129 rhs = idx_field[0](name) 

130 lhs = TypedSymbol(name, BasicType(data_type.get_element_type(name))) 

131 return SympyAssignment(lhs, rhs) 

132 raise ValueError(f"Index {name} not found in any of the passed index fields") 

133 

134 coordinate_symbol_assignments = [get_coordinate_symbol_assignment(n) 

135 for n in coordinate_names[:spatial_coordinates]] 

136 coordinate_typed_symbols = [eq.lhs for eq in coordinate_symbol_assignments] 

137 assignments = coordinate_symbol_assignments + assignments 

138 

139 # make 1D loop over index fields 

140 loop_body = Block([]) 

141 loop_node = LoopOverCoordinate(loop_body, coordinate_to_loop_over=0, start=0, stop=index_fields[0].shape[0]) 

142 

143 implement_interpolations(loop_node) 

144 

145 for assignment in assignments: 

146 loop_body.append(assignment) 

147 

148 function_body = Block([loop_node]) 

149 ast_node = KernelFunction(function_body, "cpu", "c", make_python_function, 

150 ghost_layers=None, function_name=function_name, assignments=assignments) 

151 

152 fixed_coordinate_mapping = {f.name: coordinate_typed_symbols for f in non_index_fields} 

153 

154 read_only_fields = set([f.name for f in fields_read - fields_written]) 

155 resolve_field_accesses(ast_node, read_only_fields, field_to_fixed_coordinates=fixed_coordinate_mapping) 

156 move_constants_before_loop(ast_node) 

157 return ast_node 

158 

159 

160def add_openmp(ast_node, schedule="static", num_threads=True, collapse=None, assume_single_outer_loop=True): 

161 """Parallelize the outer loop with OpenMP. 

162 

163 Args: 

164 ast_node: abstract syntax tree created e.g. by :func:`create_kernel` 

165 schedule: OpenMP scheduling policy e.g. 'static' or 'dynamic' 

166 num_threads: explicitly specify number of threads 

167 collapse: number of nested loops to include in parallel region (see OpenMP collapse) 

168 assume_single_outer_loop: if True an exception is raised if multiple outer loops are detected for all but 

169 optimized staggered kernels the single-outer-loop assumption should be true 

170 """ 

171 if not num_threads: 171 ↛ 172line 171 didn't jump to line 172, because the condition on line 171 was never true

172 return 

173 

174 assert type(ast_node) is ast.KernelFunction 

175 body = ast_node.body 

176 threads_clause = "" if num_threads and isinstance(num_threads, bool) else f" num_threads({num_threads})" 

177 wrapper_block = ast.PragmaBlock('#pragma omp parallel' + threads_clause, body.take_child_nodes()) 

178 body.append(wrapper_block) 

179 

180 outer_loops = [l for l in filtered_tree_iteration(body, LoopOverCoordinate, stop_type=SympyAssignment) 

181 if l.is_outermost_loop] 

182 assert outer_loops, "No outer loop found" 

183 if assume_single_outer_loop and len(outer_loops) > 1: 183 ↛ 184line 183 didn't jump to line 184, because the condition on line 183 was never true

184 raise ValueError("More than one outer loop found, only one outer loop expected") 

185 

186 for loop_to_parallelize in outer_loops: 

187 try: 

188 loop_range = int(loop_to_parallelize.stop - loop_to_parallelize.start) 

189 except TypeError: 

190 loop_range = None 

191 

192 if loop_range is not None and loop_range < num_threads and not collapse: 192 ↛ 193line 192 didn't jump to line 193, because the condition on line 192 was never true

193 contained_loops = [l for l in loop_to_parallelize.body.args if isinstance(l, LoopOverCoordinate)] 

194 if len(contained_loops) == 1: 

195 contained_loop = contained_loops[0] 

196 try: 

197 contained_loop_range = int(contained_loop.stop - contained_loop.start) 

198 if contained_loop_range > loop_range: 

199 loop_to_parallelize = contained_loop 

200 except TypeError: 

201 pass 

202 

203 prefix = f"#pragma omp for schedule({schedule})" 

204 if collapse: 204 ↛ 205line 204 didn't jump to line 205, because the condition on line 204 was never true

205 prefix += " collapse(%d)" % (collapse, ) 

206 loop_to_parallelize.prefix_lines.append(prefix)