1import numpy as np 

2from pystencils.data_types import BasicType 

3 

4 

5def aligned_empty(shape, byte_alignment=True, dtype=np.float64, byte_offset=0, order='C', align_inner_coordinate=True): 

6 """ 

7 Creates an aligned empty numpy array 

8 

9 Args: 

10 shape: size of the array 

11 byte_alignment: alignment in bytes, for the start address of the array holds (a % byte_alignment) == 0 

12 By default, use the maximum required by the CPU (or 512 bits if this cannot be detected). 

13 When 'cacheline' is specified, the size of a cache line is used. 

14 dtype: numpy data type 

15 byte_offset: offset in bytes for position that should be aligned i.e. (a+byte_offset) % byte_alignment == 0 

16 typically used to align first inner cell instead of ghost layer 

17 order: storage linearization order 

18 align_inner_coordinate: if True, the start of the innermost coordinate lines are aligned as well 

19 """ 

20 if byte_alignment is True or byte_alignment == 'cacheline': 

21 from pystencils.backends.simd_instruction_sets import (get_supported_instruction_sets, get_cacheline_size, 

22 get_vector_instruction_set) 

23 

24 type_name = BasicType.numpy_name_to_c(np.dtype(dtype).name) 

25 instruction_sets = get_supported_instruction_sets() 

26 if instruction_sets is None: 26 ↛ 27line 26 didn't jump to line 27, because the condition on line 26 was never true

27 byte_alignment = 64 

28 elif byte_alignment == 'cacheline': 

29 cacheline_sizes = [get_cacheline_size(is_name) for is_name in instruction_sets] 

30 if all([s is None for s in cacheline_sizes]): 30 ↛ 31line 30 didn't jump to line 31, because the condition on line 30 was never true

31 byte_alignment = max([get_vector_instruction_set(type_name, is_name)['width'] * np.dtype(dtype).itemsize 

32 for is_name in instruction_sets]) 

33 else: 

34 byte_alignment = max([s for s in cacheline_sizes if s is not None]) 

35 else: 

36 byte_alignment = max([get_vector_instruction_set(type_name, is_name)['width'] * np.dtype(dtype).itemsize 

37 for is_name in instruction_sets]) 

38 if (not align_inner_coordinate) or (not hasattr(shape, '__len__')): 

39 size = np.prod(shape) 

40 d = np.dtype(dtype) 

41 # 2 * byte_alignment instead of 1 * byte_alignment to have slack in the end such that 

42 # vectorized loops can access vector_width elements further and don't require a tail loop 

43 tmp = np.empty(size * d.itemsize + 2 * byte_alignment, dtype=np.uint8) 

44 address = tmp.__array_interface__['data'][0] 

45 offset = (byte_alignment - (address + byte_offset) % byte_alignment) % byte_alignment 

46 return tmp[offset:offset + size * d.itemsize].view(dtype=d).reshape(shape, order=order) 

47 else: 

48 if order == 'C': 48 ↛ 53line 48 didn't jump to line 53, because the condition on line 48 was never false

49 dim0_size = shape[-1] 

50 dim0 = -1 

51 dim1_size = np.prod(shape[:-1]) 

52 else: 

53 dim0_size = shape[0] 

54 dim0 = 0 

55 dim1_size = np.prod(shape[1:]) 

56 d = np.dtype(dtype) 

57 

58 assert byte_alignment >= d.itemsize and byte_alignment % d.itemsize == 0 

59 padding = (byte_alignment - ((dim0_size * d.itemsize) % byte_alignment)) % byte_alignment 

60 

61 size = dim1_size * padding + np.prod(shape) * d.itemsize 

62 tmp = aligned_empty(size, byte_alignment=byte_alignment, dtype=np.uint8, byte_offset=byte_offset) 

63 tmp = tmp.view(dtype=dtype) 

64 shape_in_bytes = [i for i in shape] 

65 shape_in_bytes[dim0] = dim0_size + padding // d.itemsize 

66 tmp = tmp.reshape(shape_in_bytes, order=order) 

67 if tmp.flags['C_CONTIGUOUS']: 67 ↛ 70line 67 didn't jump to line 70, because the condition on line 67 was never false

68 tmp = tmp[..., :shape[-1]] 

69 else: 

70 tmp = tmp[:shape[0], ...] 

71 

72 return tmp 

73 

74 

75def aligned_zeros(shape, byte_alignment=True, dtype=float, byte_offset=0, order='C', align_inner_coordinate=True): 

76 arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset, 

77 order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate) 

78 x = np.zeros((), arr.dtype) 

79 arr[...] = x 

80 return arr 

81 

82 

83def aligned_ones(shape, byte_alignment=True, dtype=float, byte_offset=0, order='C', align_inner_coordinate=True): 

84 arr = aligned_empty(shape, dtype=dtype, byte_offset=byte_offset, 

85 order=order, byte_alignment=byte_alignment, align_inner_coordinate=align_inner_coordinate) 

86 x = np.ones((), arr.dtype) 

87 arr[...] = x 

88 return arr