Coverage for src/foapy/core/_alphabet.py: 100%
16 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-17 20:45 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-17 20:45 +0000
1import numpy as np
2from numpy import ndarray
4from foapy.exceptions import Not1DArrayException
7def alphabet(X) -> ndarray:
8 """
9 Get an alphabet - a list of unique values from an array in order of their first appearance.
11 The alphabet is constructed by scanning the input array from left to right and adding each new
12 unique value encountered. This preserves the order of the first appearance of each element, which
13 can be important for maintaining relationships between elements in the original sequence.
15 | Input array X | Alphabet | Note |
16 |---------------|-----------|----------------------------------------|
17 | [ b a b c ] | [ b a c ] | 'b' appears before 'a' |
18 | [ a b c b ] | [ a b c ] | Same values but 'a' appears before 'b' |
19 | [ 2 1 3 2 1 ] | [ 2 1 3 ] | 2 appears first, then 1, then 3 |
20 | [ ] | [ ] | Empty alphabet |
22 Parameters
23 ----------
24 X : array_like
25 Array to extract an alphabet from. Must be a 1-dimensional array.
27 Returns
28 -------
29 : ndarray
30 Alphabet of X - array of unique values in order of their first appearance
32 Raises
33 -------
34 Not1DArrayException
35 When X parameter is not a 1-dimensional array
37 Examples
38 --------
39 Get an alphabet from a sequence of characters.
40 Note that the alphabet contains unique values in order of first appearance:
42 ``` py linenums="1"
43 import foapy
44 source = ['a', 'c', 'c', 'e', 'd', 'a']
45 alphabet = foapy.alphabet(source)
46 print(alphabet)
47 # ['a', 'c', 'e', 'd']
48 ```
50 An alphabet of an empty sequence is an empty array:
52 ``` py linenums="1"
53 import foapy
54 source = []
55 alphabet = foapy.alphabet(source)
56 print(alphabet)
57 # []
58 ```
60 Getting an alphabet from an array with more than 1 dimension is not allowed:
62 ``` py linenums="1"
63 import foapy
64 source = [[[1], [3]], [[6], [9]], [[6], [3]]]
65 alphabet = foapy.alphabet(source)
66 # Not1DArrayException: {'message': 'Incorrect array form. Expected d1 array, exists 3'}
67 ```
68 """ # noqa: E501
70 data = np.asanyarray(X)
71 if data.ndim > 1: # Checking for d1 array
72 raise Not1DArrayException(
73 {"message": f"Incorrect array form. Expected d1 array, exists {data.ndim}"}
74 )
76 perm = data.argsort(kind="mergesort")
78 mask_shape = data.shape
79 unique_mask = np.empty(mask_shape, dtype=bool)
80 unique_mask[:1] = True
81 unique_mask[1:] = data[perm[1:]] != data[perm[:-1]]
83 result_mask = np.full_like(unique_mask, False)
84 result_mask[:1] = True
85 result_mask[perm[unique_mask]] = True
86 return data[result_mask]