Skip to content

[CLN] More Misc Cleanups in _libs #22287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 20, 2018
7 changes: 5 additions & 2 deletions pandas/_libs/algos.pxd
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
from util cimport numeric
from numpy cimport float64_t, double_t


cpdef numeric kth_smallest(numeric[:] a, Py_ssize_t k) nogil


cdef inline Py_ssize_t swap(numeric *a, numeric *b) nogil:
cdef numeric t
cdef:
numeric t

# cython doesn't allow pointer dereference so use array syntax
t = a[0]
a[0] = b[0]
b[0] = t
return 0


cdef enum TiebreakEnumType:
TIEBREAK_AVERAGE
TIEBREAK_MIN,
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ tiebreakers = {
}


cdef inline are_diff(object left, object right):
cdef inline bint are_diff(object left, object right):
try:
return fabs(left - right) > FP_ERR
except TypeError:
Expand Down
59 changes: 33 additions & 26 deletions pandas/_libs/algos_common_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ cpdef map_indices_{{name}}(ndarray[{{c_type}}] index):

@cython.boundscheck(False)
@cython.wraparound(False)
def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef {{c_type}} cur, next
cdef int lim, fill_count = 0
def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new, limit=None):
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
{{c_type}} cur, next
int lim, fill_count = 0

nleft = len(old)
nright = len(new)
Expand Down Expand Up @@ -135,9 +135,10 @@ def pad_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, N
{{c_type}} val
int lim, fill_count = 0

N = len(values)

Expand Down Expand Up @@ -171,9 +172,10 @@ def pad_inplace_{{name}}(ndarray[{{c_type}}] values,
def pad_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, j, N, K
{{c_type}} val
int lim, fill_count = 0

K, N = (<object> values).shape

Expand Down Expand Up @@ -233,10 +235,11 @@ D
@cython.wraparound(False)
def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
limit=None):
cdef Py_ssize_t i, j, nleft, nright
cdef ndarray[int64_t, ndim=1] indexer
cdef {{c_type}} cur, prev
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, j, nleft, nright
ndarray[int64_t, ndim=1] indexer
{{c_type}} cur, prev
int lim, fill_count = 0

nleft = len(old)
nright = len(new)
Expand Down Expand Up @@ -299,9 +302,10 @@ def backfill_{{name}}(ndarray[{{c_type}}] old, ndarray[{{c_type}}] new,
def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
ndarray[uint8_t, cast=True] mask,
limit=None):
cdef Py_ssize_t i, N
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, N
{{c_type}} val
int lim, fill_count = 0

N = len(values)

Expand Down Expand Up @@ -335,9 +339,10 @@ def backfill_inplace_{{name}}(ndarray[{{c_type}}] values,
def backfill_2d_inplace_{{name}}(ndarray[{{c_type}}, ndim=2] values,
ndarray[uint8_t, ndim=2] mask,
limit=None):
cdef Py_ssize_t i, j, N, K
cdef {{c_type}} val
cdef int lim, fill_count = 0
cdef:
Py_ssize_t i, j, N, K
{{c_type}} val
int lim, fill_count = 0

K, N = (<object> values).shape

Expand Down Expand Up @@ -428,10 +433,10 @@ def is_monotonic_{{name}}(ndarray[{{c_type}}] arr, bint timelike):
@cython.wraparound(False)
@cython.boundscheck(False)
def arrmap_{{name}}(ndarray[{{c_type}}] index, object func):
cdef Py_ssize_t length = index.shape[0]
cdef Py_ssize_t i = 0

cdef ndarray[object] result = np.empty(length, dtype=np.object_)
cdef:
Py_ssize_t length = index.shape[0]
Py_ssize_t i = 0
ndarray[object] result = np.empty(length, dtype=np.object_)

from pandas._libs.lib import maybe_convert_objects

Expand Down Expand Up @@ -535,6 +540,7 @@ def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,

cdef int PLATFORM_INT = (<ndarray> np.arange(0, dtype=np.intp)).descr.type_num


cpdef ensure_platform_int(object arr):
# GH3033, GH1392
# platform int is the size of the int pointer, e.g. np.intp
Expand All @@ -546,6 +552,7 @@ cpdef ensure_platform_int(object arr):
else:
return np.array(arr, dtype=np.intp)


cpdef ensure_object(object arr):
if util.is_array(arr):
if (<ndarray> arr).descr.type_num == NPY_OBJECT:
Expand Down
9 changes: 5 additions & 4 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ cdef double nan = NaN


cdef inline float64_t median_linear(float64_t* a, int n) nogil:
cdef int i, j, na_count = 0
cdef float64_t result
cdef float64_t* tmp
cdef:
int i, j, na_count = 0
float64_t result
float64_t* tmp

if n == 0:
return NaN
Expand Down Expand Up @@ -319,7 +320,7 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,

# If we move to the next group, reset
# the fill_idx and counter
if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
if i == N - 1 or labels[idx] != labels[sorted_labels[i + 1]]:
curr_fill_idx = -1
filled_vals = 0

Expand Down
16 changes: 8 additions & 8 deletions pandas/_libs/hashing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
k = <bytes>key.encode(encoding)
kb = <uint8_t *>k
if len(k) != 16:
raise ValueError(
'key should be a 16-byte string encoded, got {!r} (len {})'.format(
k, len(k)))
raise ValueError("key should be a 16-byte string encoded, "
"got {key} (len {klen})".format(key=k, klen=len(k)))

n = len(arr)

Expand All @@ -70,8 +69,9 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'):
data = <bytes>str(val).encode(encoding)

else:
raise TypeError("{} of type {} is not a valid type for hashing, "
"must be string or null".format(val, type(val)))
raise TypeError("{val} of type {typ} is not a valid type "
"for hashing, must be string or null"
.format(val=val, typ=type(val)))

l = len(data)
lens[i] = l
Expand Down Expand Up @@ -134,9 +134,9 @@ cdef inline void _sipround(uint64_t* v0, uint64_t* v1,

cpdef uint64_t siphash(bytes data, bytes key) except? 0:
if len(key) != 16:
raise ValueError(
'key should be a 16-byte bytestring, got {!r} (len {})'.format(
key, len(key)))
raise ValueError("key should be a 16-byte bytestring, "
"got {key} (len {klen})"
.format(key=key, klen=len(key)))
return low_level_siphash(data, len(data), key)


Expand Down
14 changes: 10 additions & 4 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ cdef class {{name}}Vector:

if needs_resize(self.data):
if self.external_view_exists:
raise ValueError("external reference but Vector.resize() needed")
raise ValueError("external reference but "
"Vector.resize() needed")
self.resize()

append_data_{{dtype}}(self.data, x)
Expand Down Expand Up @@ -194,6 +195,7 @@ cdef class StringVector:
for i in range(len(x)):
self.append(x[i])


cdef class ObjectVector:

cdef:
Expand All @@ -215,7 +217,8 @@ cdef class ObjectVector:
cdef inline append(self, object o):
if self.n == self.m:
if self.external_view_exists:
raise ValueError("external reference but Vector.resize() needed")
raise ValueError("external reference but "
"Vector.resize() needed")
self.m = max(self.m * 2, _INIT_VEC_CAP)
self.ao.resize(self.m, refcheck=False)
self.data = <PyObject**> self.ao.data
Expand Down Expand Up @@ -405,8 +408,9 @@ cdef class {{name}}HashTable(HashTable):
if needs_resize(ud):
with gil:
if uniques.external_view_exists:
raise ValueError("external reference to uniques held, "
"but Vector.resize() needed")
raise ValueError("external reference to "
"uniques held, but "
"Vector.resize() needed")
uniques.resize()
append_data_{{dtype}}(ud, val)
labels[i] = count
Expand Down Expand Up @@ -742,8 +746,10 @@ cdef class StringHashTable(HashTable):

return np.asarray(labels)


na_sentinel = object


cdef class PyObjectHashTable(HashTable):

def __init__(self, size_hint=1):
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,6 @@ def ismember_{{dtype}}({{scalar}}[:] arr, {{scalar}}[:] values):
{{endfor}}



#----------------------------------------------------------------------
# Mode Computations
#----------------------------------------------------------------------
Expand Down
7 changes: 3 additions & 4 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
cimport cython
from cython cimport Py_ssize_t

from cpython cimport PyObject
from cpython.slice cimport PySlice_Check

cdef extern from "Python.h":
Expand All @@ -13,7 +12,7 @@ import numpy as np
from numpy cimport int64_t

cdef extern from "compat_helper.h":
cdef int slice_get_indices(PyObject* s, Py_ssize_t length,
cdef int slice_get_indices(object s, Py_ssize_t length,
Py_ssize_t *start, Py_ssize_t *stop,
Py_ssize_t *step,
Py_ssize_t *slicelength) except -1
Expand Down Expand Up @@ -249,7 +248,7 @@ cpdef Py_ssize_t slice_len(
if slc is None:
raise TypeError("slc must be slice")

slice_get_indices(<PyObject *>slc, objlen,
slice_get_indices(<object>slc, objlen,
&start, &stop, &step, &length)

return length
Expand All @@ -269,7 +268,7 @@ cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX):
if slc is None:
raise TypeError("slc should be a slice")

slice_get_indices(<PyObject *>slc, objlen,
slice_get_indices(<object>slc, objlen,
&start, &stop, &step, &length)

return start, stop, step, length
Expand Down
Loading