Coverage for src/foapy/characteristics/_identifying_information.py: 100%

14 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-05-17 20:45 +0000

1import numpy as np 

2 

3 

4def identifying_information(intervals_grouped, dtype=None): 

5 """ 

6 Calculates amount of identifying informations (Amount of Information / Entropy) 

7 of intervals grouped by elementof the alphabet. 

8 

9 $$H=\\frac {1} {n} * \\sum_{j=1}^{m}{(n_j * \\log_2 \\sum_{i=1}^{n_j} \\frac{\\Delta_{ij}}{n_j})}$$ 

10 

11 where \\( m \\) is count of groups (alphabet power), \\( n_j \\) is count of intervals in group \\( j \\), 

12 \\( \\Delta_{ij} \\) represents an interval at index \\( i \\) in group \\( j \\) and \\( n \\) is total count of intervals across all groups. 

13 

14 $$n=\\sum_{j=1}^{m}{n_j} $$ 

15 

16 Parameters 

17 ---------- 

18 intervals_grouped : array_like 

19 An array of intervals grouped by element 

20 dtype : dtype, optional 

21 The dtype of the output 

22 

23 Returns 

24 ------- 

25 : float 

26 The identifying information of the input array of intervals_grouped. 

27 

28 Examples 

29 -------- 

30 

31 Calculate the identifying information of intervals_grouped of a sequence. 

32 

33 ``` py linenums="1" 

34 import foapy 

35 import numpy as np 

36 

37 source = np.array(['a', 'b', 'a', 'c', 'a', 'd']) 

38 order = foapy.ma.order(source) 

39 print(order) 

40 

41 #[[0 -- 0 -- 0 --] 

42 # [-- 1 -- -- -- --] 

43 # [-- -- -- 2 -- --] 

44 # [-- -- -- -- -- 3]] 

45 

46 intervals_grouped = foapy.ma.intervals(order, foapy.binding.start, foapy.mode.normal) 

47 

48 print(intervals_grouped) 

49 # [ 

50 # array([1, 2, 2]), 

51 # array([2]), 

52 # array([4]), 

53 # array([6]) 

54 # ] 

55 

56 # m = 4 

57 # n_0 = 3 

58 # n_1 = 1 

59 # n_2 = 1 

60 # n_3 = 1 

61 # n = 6 

62 

63 result = foapy.characteristics.identifying_information(intervals_grouped) 

64 print(result) 

65 # 1.299309880536629 

66 

67 # Improve precision by specifying a dtype. 

68 result = foapy.characteristics.identifying_information(intervals_grouped, dtype=np.longdouble) 

69 print(result) 

70 # 1.2993098805366290618 

71 ``` 

72 """ # noqa: E501 

73 

74 total_elements = np.concatenate(intervals_grouped) 

75 

76 n = len(total_elements) 

77 

78 identifying_information_values = [] 

79 

80 for interval in intervals_grouped: 

81 n_j = len(interval) 

82 if n_j == 0: # Check for empty interval 

83 partial_identifying_information = 0 

84 else: 

85 average_value = np.sum(interval, dtype=dtype) / n_j 

86 log_average = np.log2(average_value, dtype=dtype) 

87 partial_identifying_information = n_j / n * log_average 

88 

89 identifying_information_values.append(partial_identifying_information) 

90 

91 return np.sum(identifying_information_values, dtype=dtype)