﻿ python – 使用第二个数组作为参考对numpy数组的元素进行分类 - 代码日志

#### python – 使用第二个数组作为参考对numpy数组的元素进行分类

``````data = array([30, 20, 30, 10, 20, 10, 20, 10, 30, 20, 20, 30, 30, 10, 30])
``````

``````reference = array([20, 10, 30])
``````

``````data = reference[indexes]
``````

``````indexes = np.zeros_like(data, dtype=int)
for i in range(data.size):
indexes[i] = np.where(data[i] == reference)[0]
``````

``````In [375]: data
Out[375]: array([30, 20, 30, 10, 20, 10, 20, 10, 30, 20, 20, 30, 30, 10, 30])

In [376]: reference
Out[376]: array([20, 10, 30])
``````

``````In [373]: np.sort(reference)
Out[373]: array([10, 20, 30])
``````

``````In [378]: np.searchsorted(np.sort(reference), data, side='left')
Out[378]: array([2, 1, 2, 0, 1, 0, 1, 0, 2, 1, 1, 2, 2, 0, 2], dtype=int64)
``````

``````In [379]: indexes
Out[379]: array([2, 0, 2, 1, 0, 1, 0, 1, 2, 0, 0, 2, 2, 1, 2])
``````

``````# Get sorting indices for reference
sort_idx = np.argsort(reference)

# Sort reference and get searchsorted indices for data in reference
pos = np.searchsorted(reference[sort_idx], data, side='left')

# Change pos indices based on sorted indices for reference
out = np.argsort(reference)[pos]
``````

``````In [396]: data = np.random.randint(0,30000,150000)
...: reference = np.unique(data)
...: reference = reference[np.random.permutation(reference.size)]
...:
...:
...: def org_approach(data,reference):
...:     indexes = np.zeros_like(data, dtype=int)
...:     for i in range(data.size):
...:         indexes[i] = np.where(data[i] == reference)[0]
...:     return indexes
...:
...: def vect_approach(data,reference):
...:     sort_idx = np.argsort(reference)
...:     pos = np.searchsorted(reference[sort_idx], data, side='left')
...:     return sort_idx[pos]
...:

In [397]: %timeit org_approach(data,reference)
1 loops, best of 3: 9.86 s per loop

In [398]: %timeit vect_approach(data,reference)
10 loops, best of 3: 32.4 ms per loop
``````

``````In [399]: np.array_equal(org_approach(data,reference),vect_approach(data,reference))
Out[399]: True
``````