Issue
Consider the array:
import numpy as np
import numpy_indexed as npi
from itertools import permutations
arr = np.array([[1, 2, 3, 4],
[3, 3, 3, 6],
[2, 0, 0, 2],
[2, 0, 0, 2],
[8, 2, 8, 2],
[4, 5, 4, 5],
[3, 3, 3, 6],
[4, 5, 4, 5],
[0, 9, 8, 7],
[1, 2, 3, 4]])
I need to find all unique row permutations. What I currently do is find all 10! row permutations, then use npi.unique
. Like this:
arr_perms = np.array([arr[i, :] for i in permutations(range(len(arr)))])
u, index = npi.unique(arr_perms, return_index = True, axis=0)
This works as expected, but it seems expensive because of the nature of the arrays I am working with. These arrays all have at least one pair (usually several pairs) of identical rows.
In the small example shown, the 10 rows hold 4 identical row pairs, so the total number of unique row permutqtions is only 10!/2^4 = 226800
, a big reduction.
QUESTION: Is there a way to efficiently find the unique row permutations without first having to find the full set of permutations?
Solution
This here might help you. It still creates all permutations, but it is 10 times faster than the code you posted, since each subarray of length 4 is converted to an int using bit shifting.
import math
import numpy as np
import numpy_indexed as npi
def generate_permutations_matrix(n, k):
"""
Generate a matrix of permutations for given values of n and k.
Parameters:
- n (int): Total number of elements.
- k (int): Length of each permutation.
Returns:
- np.ndarray: Matrix of permutations.
"""
result_matrix = np.zeros((math.perm(n, k), k), np.uint8)
f = 1
for m in range(n - k + 1, n + 1):
sub_matrix = result_matrix[:f, n - m + 1:]
for i in range(1, m):
result_matrix[i * f: (i + 1) * f, n - m] = i
result_matrix[i * f: (i + 1) * f, n - m + 1:] = sub_matrix + (
sub_matrix >= i
)
sub_matrix += 1
f *= m
return result_matrix
def convert_to_integer(arr):
"""
Convert a 2D array of integers to a single integer value.
Parameters:
- arr (np.ndarray): Input 2D array.
Returns:
- int: Integer representation of the array.
"""
arr = arr.astype(np.uint64)
integer_value = arr[..., 0].copy()
for ival in range(1, arr.shape[1]):
integer_value += arr[..., ival] << ival * 8
return integer_value
def get_permutations(arr, len_of_each):
"""
Get unique permutations of rows in a 2D array.
Parameters:
- arr (np.ndarray): Input 2D array.
- len_of_each (int): Length of each permutation.
Returns:
- np.ndarray: Permutations matrix.
"""
integer_value = convert_to_integer(arr)
# Create dictionaries for mapping integer values to unique identifiers
lookup_dict = {}
lookup_dict2 = {}
alpha1 = []
identifier = 0
# Iterate through integer values and create mappings
for ini, x in enumerate(integer_value):
if x in lookup_dict:
value = lookup_dict.get(x)
alpha1.append((value, x))
lookup_dict2[ini] = value
else:
alpha1.append((identifier, x))
lookup_dict[x] = identifier
lookup_dict2[ini] = identifier
identifier += 1
# Generate permutations matrix
permutations_matrix = generate_permutations_matrix(len(alpha1), len_of_each)
# Map original identifiers back to permutations matrix
cond_list = []
choice_list = []
for nom in lookup_dict2.items():
cond_list.append(permutations_matrix == nom[0])
choice_list.append(nom[1])
choice_list = np.array(choice_list, dtype=np.uint8)
unique_permutations = npi.unique(
np.select(cond_list, choice_list, 0), return_index=False, axis=0
)
# Map back to original identifiers
cond_list2 = []
choice_list2 = []
for nom in lookup_dict.items():
cond_list2.append(unique_permutations == nom[1])
choice_list2.append(nom[0])
result = np.select(cond_list2, choice_list2, 0)
# Convert back to the original 2D array format
return np.stack(
[
np.dstack([(result[..., g] >> rx * 8) & 255 for rx in range(arr.shape[1])])
for g in range(len_of_each)
],
axis=2,
).squeeze()
# Example usage
arr = np.array(
[
[1, 2, 3, 4],
[3, 3, 3, 6],
[2, 0, 0, 2],
[2, 0, 0, 2],
[8, 2, 8, 2],
[4, 5, 4, 5],
[3, 3, 3, 6],
[4, 5, 4, 5],
[0, 9, 8, 7],
[1, 2, 3, 4],
]
)
result_permutations = get_permutations(arr=arr, len_of_each=10)
from itertools import permutations
def test(arr):
arr_perms = np.array([arr[i, :] for i in permutations(range(len(arr)))])
u, index = npi.unique(arr_perms, return_index=True, axis=0)
return u, index
%timeit result_permutations = get_permutations(arr=arr, len_of_each=10)
1.52 s ± 27.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit test(arr)
13.6 s ± 52.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Answered By - Hans
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.