Coverage for src/foapy/characteristics/_identifying_information.py: 100%
14 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-17 20:45 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-17 20:45 +0000
1import numpy as np
4def identifying_information(intervals_grouped, dtype=None):
5 """
6 Calculates amount of identifying informations (Amount of Information / Entropy)
7 of intervals grouped by elementof the alphabet.
9 $$H=\\frac {1} {n} * \\sum_{j=1}^{m}{(n_j * \\log_2 \\sum_{i=1}^{n_j} \\frac{\\Delta_{ij}}{n_j})}$$
11 where \\( m \\) is count of groups (alphabet power), \\( n_j \\) is count of intervals in group \\( j \\),
12 \\( \\Delta_{ij} \\) represents an interval at index \\( i \\) in group \\( j \\) and \\( n \\) is total count of intervals across all groups.
14 $$n=\\sum_{j=1}^{m}{n_j} $$
16 Parameters
17 ----------
18 intervals_grouped : array_like
19 An array of intervals grouped by element
20 dtype : dtype, optional
21 The dtype of the output
23 Returns
24 -------
25 : float
26 The identifying information of the input array of intervals_grouped.
28 Examples
29 --------
31 Calculate the identifying information of intervals_grouped of a sequence.
33 ``` py linenums="1"
34 import foapy
35 import numpy as np
37 source = np.array(['a', 'b', 'a', 'c', 'a', 'd'])
38 order = foapy.ma.order(source)
39 print(order)
41 #[[0 -- 0 -- 0 --]
42 # [-- 1 -- -- -- --]
43 # [-- -- -- 2 -- --]
44 # [-- -- -- -- -- 3]]
46 intervals_grouped = foapy.ma.intervals(order, foapy.binding.start, foapy.mode.normal)
48 print(intervals_grouped)
49 # [
50 # array([1, 2, 2]),
51 # array([2]),
52 # array([4]),
53 # array([6])
54 # ]
56 # m = 4
57 # n_0 = 3
58 # n_1 = 1
59 # n_2 = 1
60 # n_3 = 1
61 # n = 6
63 result = foapy.characteristics.identifying_information(intervals_grouped)
64 print(result)
65 # 1.299309880536629
67 # Improve precision by specifying a dtype.
68 result = foapy.characteristics.identifying_information(intervals_grouped, dtype=np.longdouble)
69 print(result)
70 # 1.2993098805366290618
71 ```
72 """ # noqa: E501
74 total_elements = np.concatenate(intervals_grouped)
76 n = len(total_elements)
78 identifying_information_values = []
80 for interval in intervals_grouped:
81 n_j = len(interval)
82 if n_j == 0: # Check for empty interval
83 partial_identifying_information = 0
84 else:
85 average_value = np.sum(interval, dtype=dtype) / n_j
86 log_average = np.log2(average_value, dtype=dtype)
87 partial_identifying_information = n_j / n * log_average
89 identifying_information_values.append(partial_identifying_information)
91 return np.sum(identifying_information_values, dtype=dtype)