Coverage for src/foapy/core/_alphabet.py: 100%

16 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-05-17 20:45 +0000

1import numpy as np 

2from numpy import ndarray 

3 

4from foapy.exceptions import Not1DArrayException 

5 

6 

7def alphabet(X) -> ndarray: 

8 """ 

9 Get an alphabet - a list of unique values from an array in order of their first appearance. 

10 

11 The alphabet is constructed by scanning the input array from left to right and adding each new 

12 unique value encountered. This preserves the order of the first appearance of each element, which 

13 can be important for maintaining relationships between elements in the original sequence. 

14 

15 | Input array X | Alphabet | Note | 

16 |---------------|-----------|----------------------------------------| 

17 | [ b a b c ] | [ b a c ] | 'b' appears before 'a' | 

18 | [ a b c b ] | [ a b c ] | Same values but 'a' appears before 'b' | 

19 | [ 2 1 3 2 1 ] | [ 2 1 3 ] | 2 appears first, then 1, then 3 | 

20 | [ ] | [ ] | Empty alphabet | 

21 

22 Parameters 

23 ---------- 

24 X : array_like 

25 Array to extract an alphabet from. Must be a 1-dimensional array. 

26 

27 Returns 

28 ------- 

29 : ndarray 

30 Alphabet of X - array of unique values in order of their first appearance 

31 

32 Raises 

33 ------- 

34 Not1DArrayException 

35 When X parameter is not a 1-dimensional array 

36 

37 Examples 

38 -------- 

39 Get an alphabet from a sequence of characters. 

40 Note that the alphabet contains unique values in order of first appearance: 

41 

42 ``` py linenums="1" 

43 import foapy 

44 source = ['a', 'c', 'c', 'e', 'd', 'a'] 

45 alphabet = foapy.alphabet(source) 

46 print(alphabet) 

47 # ['a', 'c', 'e', 'd'] 

48 ``` 

49 

50 An alphabet of an empty sequence is an empty array: 

51 

52 ``` py linenums="1" 

53 import foapy 

54 source = [] 

55 alphabet = foapy.alphabet(source) 

56 print(alphabet) 

57 # [] 

58 ``` 

59 

60 Getting an alphabet from an array with more than 1 dimension is not allowed: 

61 

62 ``` py linenums="1" 

63 import foapy 

64 source = [[[1], [3]], [[6], [9]], [[6], [3]]] 

65 alphabet = foapy.alphabet(source) 

66 # Not1DArrayException: {'message': 'Incorrect array form. Expected d1 array, exists 3'} 

67 ``` 

68 """ # noqa: E501 

69 

70 data = np.asanyarray(X) 

71 if data.ndim > 1: # Checking for d1 array 

72 raise Not1DArrayException( 

73 {"message": f"Incorrect array form. Expected d1 array, exists {data.ndim}"} 

74 ) 

75 

76 perm = data.argsort(kind="mergesort") 

77 

78 mask_shape = data.shape 

79 unique_mask = np.empty(mask_shape, dtype=bool) 

80 unique_mask[:1] = True 

81 unique_mask[1:] = data[perm[1:]] != data[perm[:-1]] 

82 

83 result_mask = np.full_like(unique_mask, False) 

84 result_mask[:1] = True 

85 result_mask[perm[unique_mask]] = True 

86 return data[result_mask]