Issue
as stated in the title I want to make a calculation where instead of multiplying corresponding elements I binary XOR them and then add them. Example for illustration:
EDIT: The big picture above IS the calculation but here we go: Take first row from the left [1 0 1] and first column from top matrix [1 0 0]. 1 XOR 1 = 0, 0 XOR 0 = 0, 1 XOR 0 = 1. Add them all 0 + 0 + 1 = 1. First row from the left [1 0 1], second column [0 0 0]: 1 XOR 0 = 1, 0 XOR 0 = 0, 1 XOR 0 = 1. Add them all 1 + 0 + 1 = 2. And so on
Is it possible to do that in numpy?
Solution
You can just make a combination of two loops and Numpy 1D xor-sum, like below:
import numpy as np
m1 = np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]])
m2 = np.array([[1, 0, 1], [0, 0, 1], [1, 1, 1]])
mr = np.empty((m2.shape[0], m1.shape[1]), dtype = np.int64)
for i in range(mr.shape[0]):
for j in range(mr.shape[1]):
mr[i, j] = np.sum(m1[:, j] ^ m2[i, :])
print(mr)
Output:
[[1 2 2]
[2 1 1]
[2 3 3]]
As @MadPhysicist suggested you can use Numba JIT-optimizer (pip install numba
) to boost code above and you'll get very fast code for you operations with small memory consumption:
import numpy as np, numba
@numba.njit(cache = True)
def matxor(m1, m2):
mr = np.empty((m2.shape[0], m1.shape[1]), dtype = np.int64)
for i in range(mr.shape[0]):
for j in range(mr.shape[1]):
mr[i, j] = np.sum(m1[:, j] ^ m2[i, :])
return mr
m1 = np.array([[1, 0, 0], [0, 0, 0], [0, 0, 0]])
m2 = np.array([[1, 0, 1], [0, 0, 1], [1, 1, 1]])
print(matxor(m1, m2))
Also Numba code above can be boosted up to 44x times more thanks to following great improvements suggested and coded by @max9111:
import numpy as np, numba
m1 = np.random.randint(low=0, high=1,size=1_000_000).reshape(1_000,1_000)
m2 = np.random.randint(low=0, high=1,size=1_000_000).reshape(1_000,1_000)
#@Arty
@numba.njit(cache = True)
def matxor_1(m1, m2):
mr = np.empty((m2.shape[0], m1.shape[1]), dtype = np.int64)
for i in range(mr.shape[0]):
for j in range(mr.shape[1]):
mr[i, j] = np.sum(m1[:, j] ^ m2[i, :])
return mr
%timeit matxor_1(m1, m2)
#1.06 s ± 9.39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#Aligned memory acces (real transpose the ascontiguousarray is important)
@numba.njit(cache = True)
def matxor_2(m1, m2):
mr = np.empty((m2.shape[0], m1.shape[1]), dtype = np.int64)
m1_T=np.ascontiguousarray(m1.T)
for i in range(mr.shape[0]):
for j in range(mr.shape[1]):
mr[i, j] = np.sum(m1_T[j, :] ^ m2[i, :])
return mr
%timeit matxor_2(m1, m2)
#312 ms ± 7.05 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#Writing out the inner loop
@numba.njit(fastmath=True,cache = True)
def matxor_3(m1, m2):
mr = np.empty((m2.shape[0], m1.shape[1]), dtype = np.int64)
m1_T=np.ascontiguousarray(m1.T)
for i in range(mr.shape[0]):
for j in range(mr.shape[1]):
acc=0
for k in range(m2.shape[1]):
acc+=m1_T[j, k] ^ m2[i, k]
mr[i, j] = acc
return mr
%timeit matxor_3(m1, m2)
#125 ms ± 3.85 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#Parallelization
@numba.njit(fastmath=True,cache = True,parallel=True)
def matxor_4(m1, m2):
mr = np.empty((m2.shape[0], m1.shape[1]), dtype = np.int64)
m1_T=np.ascontiguousarray(m1.T)
for i in numba.prange(mr.shape[0]):
for j in range(mr.shape[1]):
acc=0
for k in range(m2.shape[1]):
acc+=m1_T[j, k] ^ m2[i, k]
mr[i, j] = acc
return mr
%timeit matxor_4(m1, m2)
#23.8 ms ± 711 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
print(np.allclose(matxor_1(m1, m2),matxor_2(m1, m2)))
#True
print(np.allclose(matxor_1(m1, m2),matxor_3(m1, m2)))
#True
print(np.allclose(matxor_1(m1, m2),matxor_4(m1, m2)))
#True
Answered By - Arty
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.