1import math 

2import os 

3import platform 

4from ctypes import CDLL 

5 

6from pystencils.backends.x86_instruction_sets import get_vector_instruction_set_x86 

7from pystencils.backends.arm_instruction_sets import get_vector_instruction_set_arm 

8from pystencils.backends.ppc_instruction_sets import get_vector_instruction_set_ppc 

9 

10 

11def get_vector_instruction_set(data_type='double', instruction_set='avx'): 

12 if instruction_set in ['neon'] or instruction_set.startswith('sve'): 12 ↛ 13line 12 didn't jump to line 13, because the condition on line 12 was never true

13 return get_vector_instruction_set_arm(data_type, instruction_set) 

14 elif instruction_set in ['vsx']: 14 ↛ 17line 14 didn't jump to line 17, because the condition on line 14 was never false

15 return get_vector_instruction_set_ppc(data_type, instruction_set) 

16 else: 

17 return get_vector_instruction_set_x86(data_type, instruction_set) 

18 

19 

20_cache = None 

21_cachelinesize = None 

22 

23 

24def get_supported_instruction_sets(): 

25 """List of supported instruction sets on current hardware, or None if query failed.""" 

26 global _cache 

27 if _cache is not None: 27 ↛ 28line 27 didn't jump to line 28, because the condition on line 27 was never true

28 return _cache.copy() 

29 if 'PYSTENCILS_SIMD' in os.environ: 29 ↛ 31line 29 didn't jump to line 31, because the condition on line 29 was never false

30 return os.environ['PYSTENCILS_SIMD'].split(',') 

31 if platform.system() == 'Darwin' and platform.machine() == 'arm64': # not supported by cpuinfo 

32 return ['neon'] 

33 elif platform.machine().startswith('ppc64'): # no flags reported by cpuinfo 

34 import subprocess 

35 import tempfile 

36 from pystencils.cpu.cpujit import get_compiler_config 

37 f = tempfile.NamedTemporaryFile(suffix='.cpp') 

38 command = [get_compiler_config()['command'], '-mcpu=native', '-dM', '-E', f.name] 

39 macros = subprocess.check_output(command, input='', text=True) 

40 if '#define __VSX__' in macros and '#define __ALTIVEC__' in macros: 

41 _cache = ['vsx'] 

42 else: 

43 _cache = [] 

44 return _cache.copy() 

45 try: 

46 from cpuinfo import get_cpu_info 

47 except ImportError: 

48 return None 

49 

50 result = [] 

51 required_sse_flags = {'sse', 'sse2', 'ssse3', 'sse4_1', 'sse4_2'} 

52 required_avx_flags = {'avx', 'avx2'} 

53 required_avx512_flags = {'avx512f'} 

54 required_neon_flags = {'neon'} 

55 required_sve_flags = {'sve'} 

56 flags = set(get_cpu_info()['flags']) 

57 if flags.issuperset(required_sse_flags): 

58 result.append("sse") 

59 if flags.issuperset(required_avx_flags): 

60 result.append("avx") 

61 if flags.issuperset(required_avx512_flags): 

62 result.append("avx512") 

63 if flags.issuperset(required_neon_flags): 

64 result.append("neon") 

65 if flags.issuperset(required_sve_flags): 

66 if platform.system() == 'Linux': 

67 libc = CDLL('libc.so.6') 

68 native_length = 8 * libc.prctl(51, 0, 0, 0, 0) # PR_SVE_GET_VL 

69 if native_length < 0: 

70 raise OSError("SVE length query failed") 

71 pwr2_length = int(2**math.floor(math.log2(native_length))) 

72 if pwr2_length % 256 == 0: 

73 result.append(f"sve{pwr2_length//2}") 

74 if native_length != pwr2_length: 

75 result.append(f"sve{pwr2_length}") 

76 result.append(f"sve{native_length}") 

77 else: 

78 result.append("sve") 

79 return result 

80 

81 

82def get_cacheline_size(instruction_set): 

83 """Get the size (in bytes) of a cache block that can be zeroed without memory access. 

84 Usually, this is identical to the cache line size.""" 

85 global _cachelinesize 

86 

87 instruction_sets = get_vector_instruction_set('double', instruction_set) 

88 if 'cachelineSize' not in instruction_sets: 88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true

89 return None 

90 if _cachelinesize is not None: 

91 return _cachelinesize 

92 

93 import pystencils as ps 

94 import numpy as np 

95 

96 arr = np.zeros((1, 1), dtype=np.float32) 

97 f = ps.Field.create_from_numpy_array('f', arr, index_dimensions=0) 

98 ass = [ps.astnodes.CachelineSize(), ps.Assignment(f.center, ps.astnodes.CachelineSize.symbol)] 

99 ast = ps.create_kernel(ass, cpu_vectorize_info={'instruction_set': instruction_set}) 

100 kernel = ast.compile() 

101 kernel(**{f.name: arr, ps.astnodes.CachelineSize.symbol.name: 0}) 

102 _cachelinesize = int(arr[0, 0]) 

103 return _cachelinesize