-
-
Notifications
You must be signed in to change notification settings - Fork 18.7k
API: Allow other na values in StringArray Constructor #45168
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
ba5fce1
8011f8d
36ad886
1b27993
3ddcf37
4b2ee88
4c60d0f
d7b5d4b
b13317e
a2d27ca
d5e594d
9c6e9d3
1fca524
9016c00
6ec2059
af8ece1
1fb424c
ad55cd3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -669,6 +669,40 @@ def astype_intsafe(ndarray[object] arr, cnp.dtype new_dtype) -> ndarray: | |
|
||
return result | ||
|
||
ctypedef fused ndarr_object: | ||
ndarray[object, ndim=1] | ||
ndarray[object, ndim=2] | ||
|
||
# TODO: get rid of this in StringArray and modify | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# and go through ensure_string_array instead | ||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
def convert_nans_to_NA(ndarr_object arr) -> ndarray: | ||
""" | ||
Helper for StringArray that converts null values that | ||
are not pd.NA(e.g. np.nan, None) to pd.NA. Assumes elements | ||
have already been validated as null. | ||
""" | ||
cdef: | ||
Py_ssize_t i, m, n | ||
object val | ||
ndarr_object result | ||
result = np.asarray(arr, dtype="object") | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if arr.ndim == 2: | ||
m,n = arr.shape[0], arr.shape[1] | ||
lithomas1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for i in range(m): | ||
for j in range(n): | ||
val = arr[i][j] | ||
if not isinstance(val, str): | ||
result[i][j] = <object>C_NA | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i expect its cheaper to index with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Went with the first option. Not too concerned about perf for 2D arrays given that this is a short-term solution. |
||
else: | ||
n = len(arr) | ||
for i in range(n): | ||
val = arr[i] | ||
if not isinstance(val, str): | ||
result[i] = <object>C_NA | ||
return result | ||
|
||
|
||
@cython.wraparound(False) | ||
@cython.boundscheck(False) | ||
|
@@ -1880,10 +1914,6 @@ cdef class StringValidator(Validator): | |
cdef inline bint is_array_typed(self) except -1: | ||
return issubclass(self.dtype.type, np.str_) | ||
|
||
cdef bint is_valid_null(self, object value) except -1: | ||
# We deliberately exclude None / NaN here since StringArray uses NA | ||
return value is C_NA | ||
|
||
|
||
cpdef bint is_string_array(ndarray values, bint skipna=False): | ||
cdef: | ||
|
Uh oh!
There was an error while loading. Please reload this page.