Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Also, that release drops support for Python 3.9, making Python 3.10 the minimum
* Aligned `dpnp.trim_zeros` with NumPy 2.4 to support a tuple of integers passed with `axis` keyword [#2746](https://github.com/IntelPython/dpnp/pull/2746)
* Aligned `strides` property of `dpnp.ndarray` with NumPy and CuPy implementations [#2747](https://github.com/IntelPython/dpnp/pull/2747)
* Extended `dpnp.nan_to_num` to support broadcasting of `nan`, `posinf`, and `neginf` keywords [#2754](https://github.com/IntelPython/dpnp/pull/2754)
* Changed `dpnp.partition` implementation to reuse `dpnp.sort` where it brings the performance benefit [#2766](https://github.com/IntelPython/dpnp/pull/2766)

### Deprecated

Expand Down
89 changes: 13 additions & 76 deletions dpnp/backend/kernels/dpnp_krnl_sorting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,90 +70,27 @@ DPCTLSyclEventRef dpnp_partition_c(DPCTLSyclQueueRef q_ref,

sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));

if (ndim == 1) // 1d array with C-contiguous data
{
_DataType *arr = static_cast<_DataType *>(array1_in);
_DataType *result = static_cast<_DataType *>(result1);
_DataType *arr = static_cast<_DataType *>(array1_in);
_DataType *result = static_cast<_DataType *>(result1);

auto policy = oneapi::dpl::execution::make_device_policy<
dpnp_partition_c_kernel<_DataType>>(q);
auto policy = oneapi::dpl::execution::make_device_policy<
dpnp_partition_c_kernel<_DataType>>(q);

// fill the result array with data from input one
q.memcpy(result, arr, size * sizeof(_DataType)).wait();
// fill the result array with data from input one
q.memcpy(result, arr, size * sizeof(_DataType)).wait();

// make a partial sorting such that:
// note, a loop for a multidemension input array (size_ > 1) is an
// experimental and it isn't tested properly as for now
for (size_t i = 0; i < size_; i++) {
_DataType *bufptr = result + i * shape_[0];

// for every slice it makes a partial sorting such that:
// 1. result[0 <= i < kth] <= result[kth]
// 2. result[kth <= i < size] >= result[kth]
// event-blocking call, no need for wait()
std::nth_element(policy, result, result + kth, result + size,
std::nth_element(policy, bufptr, bufptr + kth, bufptr + size,
dpnp_less_comp());
return event_ref;
}

DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size, true);
DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, array2_in, size, true);
DPNPC_ptr_adapter<_DataType> result1_ptr(q_ref, result1, size, true, true);
_DataType *arr = input1_ptr.get_ptr();
_DataType *arr2 = input2_ptr.get_ptr();
_DataType *result = result1_ptr.get_ptr();

auto arr_to_result_event = q.memcpy(result, arr, size * sizeof(_DataType));
arr_to_result_event.wait();

_DataType *matrix = new _DataType[shape_[ndim - 1]];

for (size_t i = 0; i < size_; ++i) {
size_t ind_begin = i * shape_[ndim - 1];
size_t ind_end = (i + 1) * shape_[ndim - 1] - 1;

for (size_t j = ind_begin; j < ind_end + 1; ++j) {
size_t ind = j - ind_begin;
matrix[ind] = arr2[j];
}
std::partial_sort(matrix, matrix + shape_[ndim - 1],
matrix + shape_[ndim - 1], dpnp_less_comp());
for (size_t j = ind_begin; j < ind_end + 1; ++j) {
size_t ind = j - ind_begin;
arr2[j] = matrix[ind];
}
}

shape_elem_type *shape = reinterpret_cast<shape_elem_type *>(
sycl::malloc_shared(ndim * sizeof(shape_elem_type), q));
auto memcpy_event = q.memcpy(shape, shape_, ndim * sizeof(shape_elem_type));

memcpy_event.wait();

sycl::range<2> gws(size_, kth + 1);
auto kernel_parallel_for_func = [=](sycl::id<2> global_id) {
size_t j = global_id[0];
size_t k = global_id[1];

_DataType val = arr2[j * shape[ndim - 1] + k];

for (size_t i = 0; i < static_cast<size_t>(shape[ndim - 1]); ++i) {
if (result[j * shape[ndim - 1] + i] == val) {
_DataType change_val1 = result[j * shape[ndim - 1] + i];
_DataType change_val2 = result[j * shape[ndim - 1] + k];
result[j * shape[ndim - 1] + k] = change_val1;
result[j * shape[ndim - 1] + i] = change_val2;
}
}
};

auto kernel_func = [&](sycl::handler &cgh) {
cgh.depends_on({memcpy_event});
cgh.parallel_for<class dpnp_partition_c_kernel<_DataType>>(
gws, kernel_parallel_for_func);
};

auto event = q.submit(kernel_func);

event.wait();

delete[] matrix;
sycl::free(shape, q);

return event_ref;
}

Expand Down
41 changes: 30 additions & 11 deletions dpnp/dpnp_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1459,35 +1459,54 @@ def nonzero(self):

def partition(self, /, kth, axis=-1, kind="introselect", order=None):
"""
Return a partitioned copy of an array.
Partially sorts the elements in the array in such a way that the value
of the element in k-th position is in the position it would be in a
sorted array. In the output array, all elements smaller than the k-th
element are located to the left of this element and all equal or
greater are located to its right. The ordering of the elements in the
two partitions on the either side of the k-th element in the output
array is undefined.

Rearranges the elements in the array in such a way that the value of
the element in `kth` position is in the position it would be in
a sorted array.
Refer to `dpnp.partition` for full documentation.

All elements smaller than the `kth` element are moved before this
element and all equal or greater are moved behind it. The ordering
of the elements in the two partitions is undefined.
kth : {int, sequence of ints}
Element index to partition by. The kth element value will be in its
final sorted position and all smaller elements will be moved before
it and all equal or greater elements behind it.
The order of all elements in the partitions is undefined. If
provided with a sequence of kth it will partition all elements
indexed by kth of them into their sorted position at once.
axis : int, optional
Axis along which to sort. The default is ``-1``, which means sort
along the the last axis.

Refer to `dpnp.partition` for full documentation.
Default: ``-1``.

See Also
--------
:obj:`dpnp.partition` : Return a partitioned copy of an array.
:obj:`dpnp.argpartition` : Indirect partition.
:obj:`dpnp.sort` : Full sort.

Examples
--------
>>> import dpnp as np
>>> a = np.array([3, 4, 2, 1])
>>> a.partition(3)
>>> a
array([1, 2, 3, 4]) # may vary

>>> a.partition((1, 3))
>>> a
array([1, 2, 3, 4])

"""

self._array_obj = dpnp.partition(
self, kth, axis=axis, kind=kind, order=order
).get_array()
if axis is None:
raise TypeError(
"'NoneType' object cannot be interpreted as an integer"
)
self[...] = dpnp.partition(self, kth, axis=axis, kind=kind, order=order)

def prod(
self,
Expand Down
142 changes: 113 additions & 29 deletions dpnp/dpnp_iface_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@

"""

from collections.abc import Sequence

import dpctl.tensor as dpt
import numpy
from dpctl.tensor._numpy_helper import normalize_axis_index

import dpnp
Expand All @@ -51,7 +52,6 @@
)
from .dpnp_array import dpnp_array
from .dpnp_utils import (
call_origin,
map_dtype_to_device,
)

Expand Down Expand Up @@ -147,7 +147,7 @@ def argsort(

Limitations
-----------
Parameters `order` is only supported with its default value.
Parameter `order` is only supported with its default value.
Otherwise ``NotImplementedError`` exception will be raised.
Sorting algorithms ``"quicksort"`` and ``"heapsort"`` are not supported.

Expand Down Expand Up @@ -201,44 +201,128 @@ def argsort(
)


def partition(x1, kth, axis=-1, kind="introselect", order=None):
def partition(a, kth, axis=-1, kind="introselect", order=None):
"""
Return a partitioned copy of an array.

For full documentation refer to :obj:`numpy.partition`.

Parameters
----------
a : {dpnp.ndarray, usm_ndarray}
Array to be sorted.
kth : {int, sequence of ints}
Element index to partition by. The k-th value of the element will be in
its final sorted position and all smaller elements will be moved before
it and all equal or greater elements behind it. The order of all
elements in the partitions is undefined. If provided with a sequence of
k-th it will partition all elements indexed by k-th of them into their
sorted position at once.
axis : {None, int}, optional
Axis along which to sort. If ``None``, the array is flattened before
sorting. The default is ``-1``, which sorts along the last axis.

Default: ``-1``.

Returns
-------
out : dpnp.ndarray
Array of the same type and shape as `a`.

Limitations
-----------
Input array is supported as :obj:`dpnp.ndarray`.
Input `kth` is supported as :obj:`int`.
Parameters `axis`, `kind` and `order` are supported only with default
values.
Parameters `kind` and `order` are only supported with its default value.
Otherwise ``NotImplementedError`` exception will be raised.

See Also
--------
:obj:`dpnp.ndarray.partition` : Equivalent method.
:obj:`dpnp.argpartition` : Indirect partition.
:obj:`dpnp.sort` : Full sorting.

Examples
--------
>>> import dpnp as np
>>> a = np.array([7, 1, 7, 7, 1, 5, 7, 2, 3, 2, 6, 2, 3, 0])
>>> p = np.partition(a, 4)
>>> p
array([0, 1, 1, 2, 2, 2, 3, 3, 5, 7, 7, 7, 7, 6]) # may vary

``p[4]`` is 2; all elements in ``p[:4]`` are less than or equal to
``p[4]``, and all elements in ``p[5:]`` are greater than or equal to
``p[4]``. The partition is::

[0, 1, 1, 2], [2], [2, 3, 3, 5, 7, 7, 7, 7, 6]

The next example shows the use of multiple values passed to `kth`.

>>> p2 = np.partition(a, (4, 8))
>>> p2
array([0, 1, 1, 2, 2, 2, 3, 3, 5, 6, 7, 7, 7, 7])

``p2[4]`` is 2 and ``p2[8]`` is 5. All elements in ``p2[:4]`` are less
than or equal to ``p2[4]``, all elements in ``p2[5:8]`` are greater than or
equal to ``p2[4]`` and less than or equal to ``p2[8]``, and all elements in
``p2[9:]`` are greater than or equal to ``p2[8]``. The partition is::

[0, 1, 1, 2], [2], [2, 3, 3], [5], [6, 7, 7, 7, 7]

"""

x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
if x1_desc:
if dpnp.is_cuda_backend(x1_desc.get_array()): # pragma: no cover
raise NotImplementedError(
"Running on CUDA is currently not supported"
)
dpnp.check_supported_arrays_type(a)

if not isinstance(kth, int):
pass
elif x1_desc.ndim == 0:
pass
elif kth >= x1_desc.shape[x1_desc.ndim - 1] or x1_desc.ndim + kth < 0:
pass
elif axis != -1:
pass
elif kind != "introselect":
pass
elif order is not None:
pass
else:
return dpnp_partition(x1_desc, kth, axis, kind, order).get_pyobj()
if kind != "introselect":
raise NotImplementedError(
"`kind` keyword argument is only supported with its default value."
)
if order is not None:
raise NotImplementedError(
"`order` keyword argument is only supported with its default value."
)

return call_origin(numpy.partition, x1, kth, axis, kind, order)
if axis is None:
a = dpnp.ravel(a)
axis = -1

nd = a.ndim
axis = normalize_axis_index(axis, nd)
length = a.shape[axis]

if isinstance(kth, int):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks too strict because does not support integer-like scalars as dpnp.int64
In other dpnp functions this type are accepted for axes:

# axis : {None, int, tuple}, optional
In [22]: dpnp.count_nonzero(a, axis=dpnp.int64(0))
Out[22]: array([1, 1, 2, 1])
# axis : {None, int, tuple of ints}, optional
np.all(x, axis=np.int64(0))
Out[25]: array([ True, False])

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But axes is validating by special function normalize_axis_tuple from NumPy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant kth argument

dpnp.partition(a,dpnp.int64(4),axis=None)
TypeError: kth must be int or sequence of ints, but got <class 'numpy.int64'>

Should we use operator.index for this as implemented in dpnp.tri for k?

Copy link
Contributor Author

@antonwolfy antonwolfy Feb 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, but it's not mandatory, because we explicitly state the support of int and sequence of itns in the documentation.
We can implement a general improvement and to add a helper function, like normalize_int_sequence. But I'd prefer to have that done separately as a general improvement.

kth = (kth,)
elif not isinstance(kth, Sequence):
raise TypeError(
f"kth must be int or sequence of ints, but got {type(kth)}"
)
elif not all(isinstance(k, int) for k in kth):
raise TypeError("kth is a sequence, but not all elements are integers")

nkth = len(kth)
if nkth == 0 or a.size == 0:
return dpnp.copy(a)

# validate kth
kth = list(kth)
for i in range(nkth):
if kth[i] < 0:
kth[i] += length

if not 0 <= kth[i] < length:
raise ValueError(f"kth(={kth[i]}) out of bounds {length}")

dt = a.dtype
if (
nd > 1
or nkth > 1
or dpnp.issubdtype(dt, dpnp.unsignedinteger)
or dt in (dpnp.int8, dpnp.int16)
or dpnp.is_cuda_backend(a.get_array())
):
# sort is a faster path in case of ndim > 1
return dpnp.sort(a, axis=axis)

desc = dpnp.get_dpnp_descriptor(a, copy_when_nondefault_queue=False)
return dpnp_partition(desc, kth[0], axis, kind, order).get_pyobj()


def sort(a, axis=-1, kind=None, order=None, *, descending=False, stable=None):
Expand Down
Loading
Loading