From 5ffae65f6f44a104a2061c6cdeb6475f904c4d23 Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Sun, 19 Jun 2022 20:47:53 -0400
Subject: [PATCH 01/14] ENH: lib: Allow usecols to be a callable in loadtxt().

---
 numpy/core/setup.py                           |  1 +
 .../src/multiarray/textreading/readtext.c     | 51 ++++++-------
 numpy/core/src/multiarray/textreading/rows.c  | 75 ++++++++++++++++++-
 numpy/core/src/multiarray/textreading/rows.h  |  3 +-
 .../textreading/seq_to_ssize_c_array.c        | 52 +++++++++++++
 .../textreading/seq_to_ssize_c_array.h        |  6 ++
 numpy/lib/npyio.py                            | 34 ++++++++-
 numpy/lib/tests/test_loadtxt.py               | 72 ++++++++++++++++--
 8 files changed, 250 insertions(+), 44 deletions(-)
 create mode 100644 numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.c
 create mode 100644 numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.h

diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 543b6ae39650..c4341cc91303 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -1017,6 +1017,7 @@ def opts_if_msvc(build_cmd):
             join('src', 'multiarray', 'textreading', 'stream_pyobject.c'),
             join('src', 'multiarray', 'textreading', 'str_to_int.c'),
             join('src', 'multiarray', 'textreading', 'tokenize.cpp'),
+            join('src', 'multiarray', 'textreading', 'seq_to_ssize_c_array.c'),
             ]
 
     #######################################################################
diff --git a/numpy/core/src/multiarray/textreading/readtext.c b/numpy/core/src/multiarray/textreading/readtext.c
index a5db1cb7761a..8c78cfead2a0 100644
--- a/numpy/core/src/multiarray/textreading/readtext.c
+++ b/numpy/core/src/multiarray/textreading/readtext.c
@@ -11,6 +11,7 @@
 #include "common.h"
 #include "conversion_utils.h"
 
+#include "textreading/seq_to_ssize_c_array.h"
 #include "textreading/parser_config.h"
 #include "textreading/stream_pyobject.h"
 #include "textreading/field_types.h"
@@ -19,8 +20,8 @@
 
 
 //
-// `usecols` must point to a Python object that is Py_None or a 1-d contiguous
-// numpy array with data type int32.
+// If the argument `usecols_obj` is not Py_None, it must be a callable.
+// In that case, the argument `usecols` must be NULL.
 //
 // `dtype` must point to a Python object that is Py_None or a numpy dtype
 // instance.  If the latter, code and sizes must be arrays of length
@@ -37,6 +38,7 @@
 static PyObject *
 _readtext_from_stream(stream *s,
         parser_config *pc, Py_ssize_t num_usecols, Py_ssize_t usecols[],
+        PyObject *usecols_obj,
         Py_ssize_t skiplines, Py_ssize_t max_rows,
         PyObject *converters, PyObject *dtype)
 {
@@ -69,7 +71,8 @@ _readtext_from_stream(stream *s,
 
     arr = read_rows(
             s, max_rows, num_fields, ft, pc,
-            num_usecols, usecols, skiplines, converters,
+            num_usecols, usecols, usecols_obj,
+            skiplines, converters,
             NULL, out_dtype, homogeneous);
     if (arr == NULL) {
         goto finish;
@@ -263,38 +266,25 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
      */
     Py_ssize_t num_usecols = -1;
     Py_ssize_t *usecols = NULL;
-    if (usecols_obj != Py_None) {
+    if (usecols_obj != Py_None && !PyCallable_Check(usecols_obj)) {
         num_usecols = PySequence_Length(usecols_obj);
         if (num_usecols < 0) {
             return NULL;
         }
-        /* Calloc just to not worry about overflow */
-        usecols = PyMem_Calloc(num_usecols, sizeof(Py_ssize_t));
+        usecols = seq_to_ssize_c_array(num_usecols, usecols_obj,
+                    "usecols must be an int or a sequence of ints but "
+                    "it contains at least one element of type '%s'");
         if (usecols == NULL) {
-            PyErr_NoMemory();
             return NULL;
         }
-        for (Py_ssize_t i = 0; i < num_usecols; i++) {
-            PyObject *tmp = PySequence_GetItem(usecols_obj, i);
-            if (tmp == NULL) {
-                PyMem_FREE(usecols);
-                return NULL;
-            }
-            usecols[i] = PyNumber_AsSsize_t(tmp, PyExc_OverflowError);
-            if (error_converting(usecols[i])) {
-                if (PyErr_ExceptionMatches(PyExc_TypeError)) {
-                    PyErr_Format(PyExc_TypeError,
-                            "usecols must be an int or a sequence of ints but "
-                            "it contains at least one element of type '%s'",
-                            Py_TYPE(tmp)->tp_name);
-                }
-                Py_DECREF(tmp);
-                PyMem_FREE(usecols);
-                return NULL;
-            }
-            Py_DECREF(tmp);
-        }
+        /*
+         *  The given usecols_obj is a Python sequence; it has been processed to
+         *  give the usecols array, so reset the Python object to None.
+         */
+        usecols_obj = Py_None;
     }
+    assert(usecols == NULL || usecols_obj == Py_None);
+    /* At this point, if usecols_obj is not Py_None, it must be a callable object. */
 
     stream *s;
     if (filelike) {
@@ -304,14 +294,15 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
         s = stream_python_iterable(file, encoding);
     }
     if (s == NULL) {
-        PyMem_FREE(usecols);
+        PyMem_Free(usecols);
         return NULL;
     }
 
     arr = _readtext_from_stream(
-            s, &pc, num_usecols, usecols, skiplines, max_rows, converters, dtype);
+            s, &pc, num_usecols, usecols, usecols_obj, skiplines, max_rows,
+            converters, dtype);
     stream_close(s);
-    PyMem_FREE(usecols);
+    PyMem_Free(usecols);
     return arr;
 }
 
diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index a72fb79d93cc..449d85da5c93 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <stdbool.h>
 
+#include "textreading/seq_to_ssize_c_array.h"
 #include "textreading/stream.h"
 #include "textreading/tokenize.h"
 #include "textreading/conversions.h"
@@ -134,6 +135,11 @@ create_conv_funcs(
  * @param usecols An array of length `num_usecols` or NULL.  If given indicates
  *        which column is read for each individual row (negative columns are
  *        accepted).
+ * @param usecols_obj Either `Py_None` or a callable Python object.  If
+ *        callable, the function must accept a single integer argument (the
+ *        number of columns in the file), and return a sequence of integers
+ *        (the sequence of column indices to use).  When `usecols_obj` is not
+ *        `Py_None`, `usecols` MUST be NULL.
  * @param skiplines The number of lines to skip, these lines are ignored.
  * @param converters Python dictionary of converters.  Finalizing converters
  *        is difficult without information about the number of columns.
@@ -155,7 +161,8 @@ create_conv_funcs(
 NPY_NO_EXPORT PyArrayObject *
 read_rows(stream *s,
         npy_intp max_rows, Py_ssize_t num_field_types, field_type *field_types,
-        parser_config *pconfig, Py_ssize_t num_usecols, Py_ssize_t *usecols,
+        parser_config *pconfig,
+        Py_ssize_t num_usecols, Py_ssize_t *usecols, PyObject *usecols_obj,
         Py_ssize_t skiplines, PyObject *converters,
         PyArrayObject *data_array, PyArray_Descr *out_descr,
         bool homogeneous)
@@ -179,6 +186,10 @@ read_rows(stream *s,
     /* We give a warning if max_rows is used and an empty line is encountered */
     bool give_empty_row_warning = max_rows >= 0;
 
+    // If the caller passes in a callable for usecols_obj, then they must
+    // also pass in NULL for usecols.
+    assert(usecols == NULL || usecols_obj == Py_None);
+
     int ts_result = 0;
     tokenizer_state ts;
     if (tokenizer_init(&ts, pconfig) < 0) {
@@ -246,6 +257,56 @@ read_rows(stream *s,
             // We've deferred some of the initialization tasks to here,
             // because we've now read the first line, and we definitively
             // know how many fields (i.e. columns) we will be processing.
+
+            if (usecols_obj != Py_None) {
+                // Call the Python function provided by the caller to
+                // create the usecols array.
+                PyObject *seq = PyObject_CallFunction(usecols_obj, "n",
+                                                      current_num_fields);
+                if (PyErr_Occurred()) {
+                    // User-provided function failed.
+                    goto error;
+                }
+                // The user-defined usecols function must return a sequence
+                // of integers.
+                num_usecols = PySequence_Length(seq);
+                if (num_usecols == -1) {
+                    // User-provided function did not return a sequence.
+                    PyErr_Clear();
+                    PyErr_Format(PyExc_TypeError,
+                        "the user-provided callable usecols must return a "
+                        "sequence of ints, but it returned an instance of "
+                        "type '%s'", Py_TYPE(seq)->tp_name);
+                    Py_DECREF(seq);
+                    goto error;
+                }
+
+                if (!homogeneous && num_field_types != num_usecols) {
+                    // A structured dtype was provided, and the length of
+                    // the sequence returned by the user-provided function
+                    // does not have the same length as the number of fields
+                    // in the dtype.
+                    Py_DECREF(seq);
+                    PyErr_Format(PyExc_RuntimeError,
+                        "length of the sequence returned by the callable "
+                        "usecols (%d) does not equal the number of fields "
+                        "in the given dtype (%d)",
+                        num_usecols, num_field_types);
+                    goto error;
+                }
+
+                // Convert the sequence to a C array of Py_ssize_t ints.
+                usecols = seq_to_ssize_c_array(num_usecols, seq,
+                        "the user-provided callable usecols must return a "
+                        "sequence of ints, but it returned a sequence "
+                        "containing at least one occurrence of type '%s'");
+                Py_DECREF(seq);
+                if (usecols == NULL) {
+                    goto error;
+                }
+                actual_num_fields = num_usecols;
+            }
+
             if (actual_num_fields == -1) {
                 actual_num_fields = current_num_fields;
             }
@@ -431,6 +492,12 @@ read_rows(stream *s,
         data_ptr += row_size;
     }
 
+    if (usecols_obj != Py_None) {
+        // This function owns usecols if a callable usecols_obj was given.
+        PyMem_Free(usecols);
+        usecols = NULL;  // An overabundance of caution...
+    }
+
     tokenizer_clear(&ts);
     if (conv_funcs != NULL) {
         for (Py_ssize_t i = 0; i < actual_num_fields; i++) {
@@ -479,6 +546,12 @@ read_rows(stream *s,
     return data_array;
 
   error:
+    if (usecols_obj != Py_None) {
+        // If the error occurred early enough in the function, we might
+        // not have allocated usecols yet, but that's OK, because we know
+        // that usecols is NULL in that case.
+        PyMem_Free(usecols);
+    }
     if (conv_funcs != NULL) {
         for (Py_ssize_t i = 0; i < actual_num_fields; i++) {
             Py_XDECREF(conv_funcs[i]);
diff --git a/numpy/core/src/multiarray/textreading/rows.h b/numpy/core/src/multiarray/textreading/rows.h
index 20eb9e186a19..1a707a2d3918 100644
--- a/numpy/core/src/multiarray/textreading/rows.h
+++ b/numpy/core/src/multiarray/textreading/rows.h
@@ -14,7 +14,8 @@
 NPY_NO_EXPORT PyArrayObject *
 read_rows(stream *s,
         npy_intp nrows, Py_ssize_t num_field_types, field_type *field_types,
-        parser_config *pconfig, Py_ssize_t num_usecols, Py_ssize_t *usecols,
+        parser_config *pconfig,
+        Py_ssize_t num_usecols, Py_ssize_t *usecols, PyObject *usecols_obj,
         Py_ssize_t skiplines, PyObject *converters,
         PyArrayObject *data_array, PyArray_Descr *out_descr,
         bool homogeneous);
diff --git a/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.c b/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.c
new file mode 100644
index 000000000000..67a2effef45a
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.c
@@ -0,0 +1,52 @@
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"  // For NPY_NO_EXPORT
+
+//
+// Convert a Python sequence to a C array of Py_ssize_t integers.
+//
+// `seq` must be a Python sequence of length `len`.  It is assumed that the
+// caller has already checked the length of the sequence, so the length is an
+// argument instead of being inferred from `seq` itself.
+//
+// `errtext` must be provided, and it must contain one occurrence of the
+// format code sequence '%s'.  This text is used as the text of the TypeError
+// that is raised when an element of `seq` is found that cannot be converted
+// to an a Py_ssize_t integer.  The '%s' format code will be replaced with
+// the type of the object that failed the conversion.
+//
+// Returns NULL with an exception set if the conversion fails.
+//
+// The memory for the array is allocated with PyMem_Calloc.
+// The caller must free the memory with PyMem_FREE or PyMem_Free.
+//
+NPY_NO_EXPORT Py_ssize_t *
+seq_to_ssize_c_array(Py_ssize_t len, PyObject *seq, char *errtext)
+{
+    Py_ssize_t *arr = PyMem_Calloc(len, sizeof(Py_ssize_t));
+    if (arr == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    for (Py_ssize_t i = 0; i < len; ++i) {
+        PyObject *tmp = PySequence_GetItem(seq, i);
+        if (tmp == NULL) {
+            PyMem_Free(arr);
+            return NULL;
+        }
+        arr[i] = PyNumber_AsSsize_t(tmp, PyExc_OverflowError);
+        if (arr[i] == -1 && PyErr_Occurred()) {
+            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+                PyErr_Format(PyExc_TypeError, errtext, Py_TYPE(tmp)->tp_name);
+            }
+            Py_DECREF(tmp);
+            PyMem_Free(arr);
+            return NULL;
+        }
+        Py_DECREF(tmp);
+    }
+    return arr;
+}
diff --git a/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.h b/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.h
new file mode 100644
index 000000000000..a58d7a01a93c
--- /dev/null
+++ b/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.h
@@ -0,0 +1,6 @@
+#ifndef SEQ_TO_SSIZE_C_ARRAY_H
+#define SEQ_TO_SSIZE_C_ARRAY_H
+
+Py_ssize_t *seq_to_ssize_c_array(Py_ssize_t len, PyObject *seq, char *initialtext);
+
+#endif
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 210c0ea94a7e..f2240cbd5a11 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -888,9 +888,10 @@ def _read(fname, *, delimiter=',', comment='#', quote='"',
         read_dtype_via_object_chunks = dtype
         dtype = np.dtype(object)
 
-    if usecols is not None:
-        # Allow usecols to be a single int or a sequence of ints, the C-code
-        # handles the rest
+    if usecols is not None and not callable(usecols):
+        # If usecols is not callable, it must be an int or a sequence of ints.
+        # Process usecols so that when it is not a callable, it is a list; the
+        # C code will handle the rest of the validation.
         try:
             usecols = list(usecols)
         except TypeError:
@@ -1099,15 +1100,24 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         Default: None.
     skiprows : int, optional
         Skip the first `skiprows` lines, including comments; default: 0.
-    usecols : int or sequence, optional
+    usecols : int, sequence of ints, or callable, optional
         Which columns to read, with 0 being the first. For example,
         ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
         The default, None, results in all columns being read.
 
+        `usecols` may be a callable function that accepts a single integer
+        argument that gives the number of columns in the file. The callable
+        must return a sequence of integers that indicate which columns to
+        include in the output array.
+
         .. versionchanged:: 1.11.0
             When a single column has to be read it is possible to use
             an integer instead of a tuple. E.g ``usecols = 3`` reads the
             fourth column the same way as ``usecols = (3,)`` would.
+
+        .. versionchanged:: 1.24.0
+            Added the option for `usecols` to be a callable.
+
     unpack : bool, optional
         If True, the returned array is transposed, so that arguments may be
         unpacked using ``x, y, z = loadtxt(...)``.  When used with a
@@ -1272,6 +1282,22 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     >>> np.loadtxt(s, dtype="U", delimiter=",", quotechar='"')
     array('Hello, my name is "Monty"!', dtype='<U26')
 
+    The parameter `usecols` can be a callable function.  This example shows
+    how `usecols` can be used to skip the second column of the input file.
+
+    >>> s1 = StringIO('10.25 ABC 1.5 3.5\n12.50 XYZ 2.5 8.0')
+    >>> np.loadtxt(s1, usecols=lambda n: [0] + list(range(2, n)))
+    array([[10.25,  1.5 ,  3.5 ],
+           [12.5 ,  2.5 ,  8.  ]])
+
+    The caller does not have to know the number of columns in advance.
+    The same example works with this input, which has more columns.
+
+    >>> s2 = StringIO('10.25 ABC 1.5 3.5 5.5\n12.50 XYZ 2.5 8.0 9.5')
+    >>> np.loadtxt(s2, usecols=lambda n: [0] + list(range(2, n)))
+    array([[10.25,  1.5 ,  3.5 ,  5.5 ],
+           [12.5 ,  2.5 ,  8.  ,  9.5 ]])
+
     """
 
     if like is not None:
diff --git a/numpy/lib/tests/test_loadtxt.py b/numpy/lib/tests/test_loadtxt.py
index 0b8fe3c479c6..042f8a44ef6c 100644
--- a/numpy/lib/tests/test_loadtxt.py
+++ b/numpy/lib/tests/test_loadtxt.py
@@ -225,7 +225,8 @@ def test_converters_negative_indices():
     assert_equal(res, expected)
 
 
-def test_converters_negative_indices_with_usecols():
+@pytest.mark.parametrize('usecols', [[0, -1], lambda n: [0, -1]])
+def test_converters_negative_indices_with_usecols(usecols):
     txt = StringIO('1.5,2.5,3.5\n3.0,4.0,XXX\n5.5,6.0,7.5\n')
     conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
     expected = np.array([[1.5, 3.5], [3.0, np.nan], [5.5, 7.5]])
@@ -234,37 +235,88 @@ def test_converters_negative_indices_with_usecols():
         dtype=np.float64,
         delimiter=",",
         converters=conv,
-        usecols=[0, -1],
+        usecols=usecols,
         encoding=None,
     )
     assert_equal(res, expected)
 
     # Second test with variable number of rows:
     res = np.loadtxt(StringIO('''0,1,2\n0,1,2,3,4'''), delimiter=",",
-                     usecols=[0, -1], converters={-1: (lambda x: -1)})
+                     usecols=usecols, converters={-1: (lambda x: -1)})
     assert_array_equal(res, [[0, -1], [0, -1]])
 
-def test_ragged_usecols():
+
+@pytest.mark.parametrize('usecols', [[0, -2], lambda n: [0, -2]])
+def test_ragged_usecols(usecols):
     # usecols, and negative ones, work even with varying number of columns.
     txt = StringIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
     expected = np.array([[0, 0], [0, 0], [0, 0]])
-    res = np.loadtxt(txt, dtype=float, delimiter=",", usecols=[0, -2])
+    res = np.loadtxt(txt, dtype=float, delimiter=",", usecols=usecols)
     assert_equal(res, expected)
 
     txt = StringIO("0,0,XXX\n0\n0,XXX,XXX,0,XXX\n")
     with pytest.raises(ValueError,
                 match="invalid column index -2 at row 2 with 1 columns"):
         # There is no -2 column in the second row:
-        np.loadtxt(txt, dtype=float, delimiter=",", usecols=[0, -2])
+        np.loadtxt(txt, dtype=float, delimiter=",", usecols=usecols)
 
 
-def test_empty_usecols():
+@pytest.mark.parametrize('usecols', [[], lambda n: []])
+def test_empty_usecols(usecols):
     txt = StringIO("0,0,XXX\n0,XXX,0,XXX\n0,XXX,XXX,0,XXX\n")
-    res = np.loadtxt(txt, dtype=np.dtype([]), delimiter=",", usecols=[])
+    res = np.loadtxt(txt, dtype=np.dtype([]), delimiter=",", usecols=usecols)
     assert res.shape == (3,)
     assert res.dtype == np.dtype([])
 
 
+def test_callable_usecols_basic():
+    txt = StringIO('1 2 3 4 5\n6 7 8 9 1\n2 3 4 5 6\n')
+    res = np.loadtxt(txt, dtype=int, usecols=lambda n: list(range(n)[::2]))
+    expected = np.array([[1, 3, 5], [6, 8, 1], [2, 4, 6]])
+    assert_equal(res, expected)
+
+
+def test_callable_usecols_method():
+
+    class Foo:
+
+        def usecols(self, n):
+            return list(range(n)[1::2])
+
+        def load(self):
+            return np.loadtxt(StringIO('1 2 3 4\n5 6 7 8'),
+                              usecols=self.usecols)
+
+    res = Foo().load()
+    expected = np.array([[2.0, 4.0], [6.0, 8.0]])
+    assert_equal(res, expected)
+
+
+def test_callable_usecols_raises_exception():
+    with pytest.raises(ZeroDivisionError, match='division by zero'):
+        np.loadtxt(StringIO('1 2\n4 5'), usecols=lambda n: 1/0)
+
+
+def test_callable_usecols_not_a_sequence():
+    with pytest.raises(TypeError,
+                       match="returned an instance of type 'module'"):
+        np.loadtxt(StringIO('1 2\n4 5'), usecols=lambda n: np)
+
+
+def test_callable_usecols_bad_element_type():
+    with pytest.raises(TypeError,
+                       match="at least one occurrence of type 'module'"):
+        np.loadtxt(StringIO('1 2\n3 4'), usecols=lambda n: [np])
+
+
+def test_callable_usecols_bad_len_with_structured_dtype():
+    dt = np.dtype([('x', float), ('y', float)])
+    txt = StringIO('1 2 3 4\n5 6 7 8\n')
+    with pytest.raises(RuntimeError,
+                       match='the number of fields in the given dtype'):
+        np.loadtxt(txt, usecols=lambda n: [0, 2, 3], dtype=dt)
+
+
 @pytest.mark.parametrize("c1", ["a", "の", "🫕"])
 @pytest.mark.parametrize("c2", ["a", "の", "🫕"])
 def test_large_unicode_characters(c1, c2):
@@ -415,6 +467,7 @@ def test_implicit_cast_float_to_int_fails(dtype):
     with pytest.raises(ValueError):
         np.loadtxt(txt, dtype=dtype, delimiter=",")
 
+
 @pytest.mark.parametrize("dtype", (np.complex64, np.complex128))
 @pytest.mark.parametrize("with_parens", (False, True))
 def test_complex_parsing(dtype, with_parens):
@@ -610,6 +663,7 @@ def test_quoted_field_is_not_empty():
     res = np.loadtxt(txt, delimiter=",", dtype="U1", quotechar='"')
     assert_equal(res, expected)
 
+
 def test_quoted_field_is_not_empty_nonstrict():
     # Same as test_quoted_field_is_not_empty but check that we are not strict
     # about missing closing quote (this is the `csv.reader` default also)
@@ -618,6 +672,7 @@ def test_quoted_field_is_not_empty_nonstrict():
     res = np.loadtxt(txt, delimiter=",", dtype="U1", quotechar='"')
     assert_equal(res, expected)
 
+
 def test_consecutive_quotechar_escaped():
     txt = StringIO('"Hello, my name is ""Monty""!"')
     expected = np.array('Hello, my name is "Monty"!', dtype="U40")
@@ -649,6 +704,7 @@ def test_warn_on_no_data(data, ndmin, usecols):
             res = np.loadtxt(txt, ndmin=ndmin, usecols=usecols)
         assert res.shape == expected_shape
 
+
 @pytest.mark.parametrize("skiprows", (2, 3))
 def test_warn_on_skipped_data(skiprows):
     data = "1 2 3\n4 5 6"

From 10f1d08920586deb191856dde68443336deed3a1 Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Tue, 21 Jun 2022 10:27:26 -0400
Subject: [PATCH 02/14] Fix check of the result of PyObject_CallFunction()

---
 numpy/core/src/multiarray/textreading/rows.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index 449d85da5c93..a75c3f8aeacd 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -263,7 +263,7 @@ read_rows(stream *s,
                 // create the usecols array.
                 PyObject *seq = PyObject_CallFunction(usecols_obj, "n",
                                                       current_num_fields);
-                if (PyErr_Occurred()) {
+                if (seq == NULL) {
                     // User-provided function failed.
                     goto error;
                 }

From 0d8d3c55c80952ba45d671ee27888e48db1f1bea Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Thu, 23 Jun 2022 14:11:28 -0400
Subject: [PATCH 03/14] Lots of updates. (Squash this commit!)

---
 numpy/core/setup.py                           |   1 -
 numpy/core/src/multiarray/conversion_utils.c  |  63 ++++
 numpy/core/src/multiarray/conversion_utils.h  |   5 +
 .../src/multiarray/textreading/readtext.c     | 107 +------
 numpy/core/src/multiarray/textreading/rows.c  | 299 +++++++++++-------
 numpy/core/src/multiarray/textreading/rows.h  |  10 +-
 .../textreading/seq_to_ssize_c_array.c        |  52 ---
 .../textreading/seq_to_ssize_c_array.h        |   6 -
 .../src/multiarray/textreading/tokenize.cpp   |   2 +
 numpy/lib/tests/test_io.py                    |   4 +-
 numpy/lib/tests/test_loadtxt.py               |  44 +++
 11 files changed, 314 insertions(+), 279 deletions(-)
 delete mode 100644 numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.c
 delete mode 100644 numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.h

diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index c4341cc91303..543b6ae39650 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -1017,7 +1017,6 @@ def opts_if_msvc(build_cmd):
             join('src', 'multiarray', 'textreading', 'stream_pyobject.c'),
             join('src', 'multiarray', 'textreading', 'str_to_int.c'),
             join('src', 'multiarray', 'textreading', 'tokenize.cpp'),
-            join('src', 'multiarray', 'textreading', 'seq_to_ssize_c_array.c'),
             ]
 
     #######################################################################
diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index 96afb6c00ce2..bb4a14b6d7d3 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -1147,6 +1147,69 @@ PyArray_IntpFromSequence(PyObject *seq, npy_intp *vals, int maxvals)
 }
 
 
+//
+// Convert a Python sequence to a C array of Py_ssize_t integers.
+//
+// `seq` must be a Python sequence.
+//
+// `seq_typeerror_text` and `element_typeerror_text` must be provided, and
+// each must contain one occurrence of the format code sequence '%s'.  The
+// '%s' format code will be replaced with the type of the object that failed
+// the conversion.
+// * `seq_typeerror_text` is used if the attempt to get the length of `seq`
+//   fails.
+// * `element_typerror_text` is used if the attempt to convert an element
+//   of `seq` to a Py_ssize_t fails.
+//
+// On success, the function assigns the allocated memory to *parr, and
+// returns the length of the sequence.
+//
+// The memory for the array is allocated with PyMem_Calloc.
+// The caller must free the memory with PyMem_FREE or PyMem_Free.
+//
+// Returns -1 with an exception set and with *parr set to NULL if the
+// conversion fails.
+//
+NPY_NO_EXPORT Py_ssize_t
+PyArray_SeqToSsizeCArray(PyObject *seq, Py_ssize_t **parr,
+                         char *seq_typeerror_text,
+                         char *element_typeerror_text)
+{
+    *parr = NULL;
+    Py_ssize_t len = PySequence_Length(seq);
+    if (len == -1) {
+        PyErr_Format(PyExc_TypeError, seq_typeerror_text,
+                     Py_TYPE(seq)->tp_name);
+        return -1;
+    }
+    Py_ssize_t *arr = PyMem_Calloc(len, sizeof(Py_ssize_t));
+    if (arr == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    for (Py_ssize_t i = 0; i < len; ++i) {
+        PyObject *tmp = PySequence_GetItem(seq, i);
+        if (tmp == NULL) {
+            PyMem_Free(arr);
+            return -1;
+        }
+        arr[i] = PyNumber_AsSsize_t(tmp, PyExc_OverflowError);
+        if (error_converting(arr[i])) {
+            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+                PyErr_Format(PyExc_TypeError, element_typeerror_text,
+                             Py_TYPE(tmp)->tp_name);
+            }
+            Py_DECREF(tmp);
+            PyMem_Free(arr);
+            return -1;
+        }
+        Py_DECREF(tmp);
+    }
+    *parr = arr;
+    return len;
+}
+
+
 /**
  * WARNING: This flag is a bad idea, but was the only way to both
  *   1) Support unpickling legacy pickles with object types.
diff --git a/numpy/core/src/multiarray/conversion_utils.h b/numpy/core/src/multiarray/conversion_utils.h
index 4d0fbb8941ba..4c27ca31a052 100644
--- a/numpy/core/src/multiarray/conversion_utils.h
+++ b/numpy/core/src/multiarray/conversion_utils.h
@@ -48,6 +48,11 @@ PyArray_IntpFromIndexSequence(PyObject *seq, npy_intp *vals, npy_intp maxvals);
 NPY_NO_EXPORT int
 PyArray_IntpFromSequence(PyObject *seq, npy_intp *vals, int maxvals);
 
+NPY_NO_EXPORT Py_ssize_t
+PyArray_SeqToSsizeCArray(PyObject *seq, Py_ssize_t **parr,
+                         char *seq_typeerror_text,
+                         char *element_typeerror_text);
+
 NPY_NO_EXPORT int
 PyArray_TypestrConvert(int itemsize, int gentype);
 
diff --git a/numpy/core/src/multiarray/textreading/readtext.c b/numpy/core/src/multiarray/textreading/readtext.c
index 8c78cfead2a0..0d61bac1324d 100644
--- a/numpy/core/src/multiarray/textreading/readtext.c
+++ b/numpy/core/src/multiarray/textreading/readtext.c
@@ -11,78 +11,9 @@
 #include "common.h"
 #include "conversion_utils.h"
 
-#include "textreading/seq_to_ssize_c_array.h"
 #include "textreading/parser_config.h"
 #include "textreading/stream_pyobject.h"
-#include "textreading/field_types.h"
 #include "textreading/rows.h"
-#include "textreading/str_to_int.h"
-
-
-//
-// If the argument `usecols_obj` is not Py_None, it must be a callable.
-// In that case, the argument `usecols` must be NULL.
-//
-// `dtype` must point to a Python object that is Py_None or a numpy dtype
-// instance.  If the latter, code and sizes must be arrays of length
-// num_dtype_fields, holding the flattened data field type codes and byte
-// sizes. (num_dtype_fields, codes, and sizes can be inferred from dtype,
-// but we do that in Python code.)
-//
-// If both `usecols` and `dtype` are not None, and the data type is compound,
-// then len(usecols) must equal num_dtype_fields.
-//
-// If `dtype` is given and it is compound, and `usecols` is None, then the
-// number of columns in the file must match the number of fields in `dtype`.
-//
-static PyObject *
-_readtext_from_stream(stream *s,
-        parser_config *pc, Py_ssize_t num_usecols, Py_ssize_t usecols[],
-        PyObject *usecols_obj,
-        Py_ssize_t skiplines, Py_ssize_t max_rows,
-        PyObject *converters, PyObject *dtype)
-{
-    PyArrayObject *arr = NULL;
-    PyArray_Descr *out_dtype = NULL;
-    field_type *ft = NULL;
-
-    /*
-     * If dtypes[0] is dtype the input was not structured and the result
-     * is considered "homogeneous" and we have to discover the number of
-     * columns/
-     */
-    out_dtype = (PyArray_Descr *)dtype;
-    Py_INCREF(out_dtype);
-
-    Py_ssize_t num_fields = field_types_create(out_dtype, &ft);
-    if (num_fields < 0) {
-        goto finish;
-    }
-    bool homogeneous = num_fields == 1 && ft[0].descr == out_dtype;
-
-    if (!homogeneous && usecols != NULL && num_usecols != num_fields) {
-        PyErr_Format(PyExc_TypeError,
-                "If a structured dtype is used, the number of columns in "
-                "`usecols` must match the effective number of fields. "
-                "But %zd usecols were given and the number of fields is %zd.",
-                num_usecols, num_fields);
-        goto finish;
-    }
-
-    arr = read_rows(
-            s, max_rows, num_fields, ft, pc,
-            num_usecols, usecols, usecols_obj,
-            skiplines, converters,
-            NULL, out_dtype, homogeneous);
-    if (arr == NULL) {
-        goto finish;
-    }
-
-  finish:
-    Py_XDECREF(out_dtype);
-    field_types_xclear(num_fields, ft);
-    return (PyObject *)arr;
-}
 
 
 static int
@@ -208,8 +139,6 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
     };
     bool filelike = true;
 
-    PyObject *arr = NULL;
-
     NPY_PREPARE_ARGPARSER;
     if (npy_parse_arguments("_load_from_filelike", args, len_args, kwnames,
             "file", NULL, &file,
@@ -260,32 +189,6 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
         }
     }
 
-    /*
-     * Parse usecols, the rest of NumPy has no clear helper for this, so do
-     * it here manually.
-     */
-    Py_ssize_t num_usecols = -1;
-    Py_ssize_t *usecols = NULL;
-    if (usecols_obj != Py_None && !PyCallable_Check(usecols_obj)) {
-        num_usecols = PySequence_Length(usecols_obj);
-        if (num_usecols < 0) {
-            return NULL;
-        }
-        usecols = seq_to_ssize_c_array(num_usecols, usecols_obj,
-                    "usecols must be an int or a sequence of ints but "
-                    "it contains at least one element of type '%s'");
-        if (usecols == NULL) {
-            return NULL;
-        }
-        /*
-         *  The given usecols_obj is a Python sequence; it has been processed to
-         *  give the usecols array, so reset the Python object to None.
-         */
-        usecols_obj = Py_None;
-    }
-    assert(usecols == NULL || usecols_obj == Py_None);
-    /* At this point, if usecols_obj is not Py_None, it must be a callable object. */
-
     stream *s;
     if (filelike) {
         s = stream_python_file(file, encoding);
@@ -294,15 +197,13 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
         s = stream_python_iterable(file, encoding);
     }
     if (s == NULL) {
-        PyMem_Free(usecols);
         return NULL;
     }
 
-    arr = _readtext_from_stream(
-            s, &pc, num_usecols, usecols, usecols_obj, skiplines, max_rows,
-            converters, dtype);
+    PyArrayObject *arr = read_rows(s, max_rows, &pc, usecols_obj, skiplines,
+                                   converters, NULL, dtype);
+
     stream_close(s);
-    PyMem_Free(usecols);
-    return arr;
+    return (PyObject *)arr;
 }
 
diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index a75c3f8aeacd..c92878adc1d4 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -11,7 +11,6 @@
 #include <string.h>
 #include <stdbool.h>
 
-#include "textreading/seq_to_ssize_c_array.h"
 #include "textreading/stream.h"
 #include "textreading/tokenize.h"
 #include "textreading/conversions.h"
@@ -26,6 +25,36 @@
 #define MIN_BLOCK_SIZE (1 << 13)
 
 
+/*
+ * Call the user-defined usecols callable and create the C array of
+ * Py_ssize_t integers.
+ *
+ * Returns the size of the array, and sets *p_usecols_arr to the
+ * C array, allocated with PyMem_Calloc.  The caller is responsible
+ * for freeing this memory with PyMem_Free().
+ *
+ * Returns -1 with an exception set and *p_usecols_arr set to NULL
+ * if the call or conversion fails.
+ */
+static Py_ssize_t
+get_usecols_arr_from_callable(PyObject *usecols_obj, Py_ssize_t n,
+                              Py_ssize_t **p_usecols_arr)
+{
+    *p_usecols_arr = NULL;
+    PyObject *seq = PyObject_CallFunction(usecols_obj, "n", n);
+    if (seq == NULL) {
+        return -1;
+    }
+    Py_ssize_t num_usecols = PyArray_SeqToSsizeCArray(seq, p_usecols_arr,
+                    "the user-provided callable usecols must return a "
+                    "sequence of ints, but it returned an instance of "
+                    "type '%s'",
+                    "the user-provided callable usecols must return a "
+                    "sequence of ints, but it returned a sequence "
+                    "containing at least one occurrence of type '%s'");
+    Py_DECREF(seq);
+    return num_usecols;
+}
 
 /*
  *  Create the array of converter functions from the Python converters.
@@ -118,6 +147,17 @@ create_conv_funcs(
     return NULL;
 }
 
+static void
+free_conv_funcs(Py_ssize_t num_fields, Py_ssize_t *conv_funcs)
+{
+    if (conv_funcs != NULL) {
+        for (Py_ssize_t i = 0; i < num_fields; i++) {
+            Py_XDECREF(conv_funcs[i]);
+        }
+        PyMem_FREE(conv_funcs);
+    }
+}
+
 /**
  * Read a file into the provided array, or create (and possibly grow) an
  * array to read into.
@@ -126,51 +166,88 @@ create_conv_funcs(
  *        the tokenizer.
  * @param max_rows The number of rows to read, or -1.  If negative
  *        all rows are read.
- * @param num_field_types The number of field types stored in `field_types`.
- * @param field_types Information about the dtype for each column (or one if
- *        `homogeneous`).
  * @param pconfig Pointer to the parser config object used by both the
  *        tokenizer and the conversion functions.
- * @param num_usecols The number of columns in `usecols`.
- * @param usecols An array of length `num_usecols` or NULL.  If given indicates
- *        which column is read for each individual row (negative columns are
- *        accepted).
- * @param usecols_obj Either `Py_None` or a callable Python object.  If
- *        callable, the function must accept a single integer argument (the
- *        number of columns in the file), and return a sequence of integers
- *        (the sequence of column indices to use).  When `usecols_obj` is not
- *        `Py_None`, `usecols` MUST be NULL.
+ * @param usecols_obj The `usecols` object provided by the `loadtxt()` caller.
  * @param skiplines The number of lines to skip, these lines are ignored.
  * @param converters Python dictionary of converters.  Finalizing converters
  *        is difficult without information about the number of columns.
  * @param data_array An array to be filled or NULL.  In either case a new
  *        reference is returned (the reference to `data_array` is not stolen).
- * @param out_descr The dtype used for allocating a new array.  This is not
+ * @param dtype The dtype used for allocating a new array.  This is not
  *        used if `data_array` is provided.  Note that the actual dtype of the
  *        returned array can differ for strings.
- * @param num_cols Pointer in which the actual (discovered) number of columns
- *        is returned.  This is only relevant if `homogeneous` is true.
- * @param homogeneous Whether the datatype of the array is not homogeneous,
- *        i.e. not structured.  In this case the number of columns has to be
- *        discovered an the returned array will be 2-dimensional rather than
- *        1-dimensional.
  *
  * @returns Returns the result as an array object or NULL on error.  The result
  *          is always a new reference (even when `data_array` was passed in).
  */
 NPY_NO_EXPORT PyArrayObject *
 read_rows(stream *s,
-        npy_intp max_rows, Py_ssize_t num_field_types, field_type *field_types,
-        parser_config *pconfig,
-        Py_ssize_t num_usecols, Py_ssize_t *usecols, PyObject *usecols_obj,
+        npy_intp max_rows, parser_config *pconfig, PyObject *usecols_obj,
         Py_ssize_t skiplines, PyObject *converters,
-        PyArrayObject *data_array, PyArray_Descr *out_descr,
-        bool homogeneous)
+        PyArrayObject *data_array, PyObject *dtype)
 {
     char *data_ptr = NULL;
     Py_ssize_t current_num_fields;
-    npy_intp row_size = out_descr->elsize;
+    Py_ssize_t prev_num_fields;
     PyObject **conv_funcs = NULL;
+    npy_intp row_size;
+    Py_ssize_t num_usecols = -1;
+    Py_ssize_t *usecols_arr = NULL;
+    PyArray_Descr *out_descr = NULL;
+    field_type *field_types = NULL;
+
+    int ts_result = 0;
+    tokenizer_state ts;
+    if (tokenizer_init(&ts, pconfig) < 0) {
+        goto error;
+    }
+
+    bool usecols_iscallable = PyCallable_Check(usecols_obj);
+
+    /*
+     * Parse usecols_obj, if it is not None or callable.
+     *
+     * Note: we don't have to handle the case of usecols being a scalar int,
+     * because such an argument is wrapped in a list in the Python code
+     * before it gets here.
+     */
+    if (!(usecols_obj == Py_None || usecols_iscallable)) {
+        num_usecols = PyArray_SeqToSsizeCArray(usecols_obj, &usecols_arr,
+                "usecols must be an int, a sequence of ints, or a callable, "
+                "but type '%s' was given",
+                "usecols must contain only ints, but at least one occurrence "
+                "of type '%s' was found");
+        if (num_usecols == -1) {
+            goto error;
+        }
+    }
+
+    /*
+     * Parse dtype.
+     *
+     * If dtypes[0] is dtype the input was not structured and the result
+     * is considered "homogeneous" and we have to discover the number of
+     * columns.
+     */
+    out_descr = (PyArray_Descr *)dtype;
+    /* Hold on to a reference while processing the file. */
+    Py_INCREF(out_descr);
+
+    Py_ssize_t num_field_types = field_types_create(out_descr, &field_types);
+    if (num_field_types < 0) {
+        goto error;
+    }
+    bool homogeneous = num_field_types == 1 && field_types[0].descr == out_descr;
+
+    if (!homogeneous && usecols_arr != NULL && num_usecols != num_field_types) {
+        PyErr_Format(PyExc_TypeError,
+                "If a structured dtype is used, the number of columns in "
+                "`usecols` must match the effective number of fields. "
+                "But %zd usecols were given and the number of fields is %zd.",
+                num_usecols, num_field_types);
+        goto error;
+    }
 
     bool needs_init = PyDataType_FLAGCHK(out_descr, NPY_NEEDS_INIT);
 
@@ -186,19 +263,9 @@ read_rows(stream *s,
     /* We give a warning if max_rows is used and an empty line is encountered */
     bool give_empty_row_warning = max_rows >= 0;
 
-    // If the caller passes in a callable for usecols_obj, then they must
-    // also pass in NULL for usecols.
-    assert(usecols == NULL || usecols_obj == Py_None);
-
-    int ts_result = 0;
-    tokenizer_state ts;
-    if (tokenizer_init(&ts, pconfig) < 0) {
-        goto error;
-    }
-
     /* Set the actual number of fields if it is already known, otherwise -1 */
     Py_ssize_t actual_num_fields = -1;
-    if (usecols != NULL) {
+    if (usecols_arr != NULL) {
         assert(homogeneous || num_field_types == num_usecols);
         actual_num_fields = num_usecols;
     }
@@ -258,52 +325,22 @@ read_rows(stream *s,
             // because we've now read the first line, and we definitively
             // know how many fields (i.e. columns) we will be processing.
 
-            if (usecols_obj != Py_None) {
-                // Call the Python function provided by the caller to
-                // create the usecols array.
-                PyObject *seq = PyObject_CallFunction(usecols_obj, "n",
-                                                      current_num_fields);
-                if (seq == NULL) {
-                    // User-provided function failed.
-                    goto error;
-                }
-                // The user-defined usecols function must return a sequence
-                // of integers.
-                num_usecols = PySequence_Length(seq);
-                if (num_usecols == -1) {
-                    // User-provided function did not return a sequence.
-                    PyErr_Clear();
-                    PyErr_Format(PyExc_TypeError,
-                        "the user-provided callable usecols must return a "
-                        "sequence of ints, but it returned an instance of "
-                        "type '%s'", Py_TYPE(seq)->tp_name);
-                    Py_DECREF(seq);
+            prev_num_fields = current_num_fields;
+
+            if (usecols_iscallable) {
+                num_usecols = get_usecols_arr_from_callable(usecols_obj,
+                                    current_num_fields, &usecols_arr);
+                if (num_usecols < 0) {
                     goto error;
                 }
-
                 if (!homogeneous && num_field_types != num_usecols) {
-                    // A structured dtype was provided, and the length of
-                    // the sequence returned by the user-provided function
-                    // does not have the same length as the number of fields
-                    // in the dtype.
-                    Py_DECREF(seq);
                     PyErr_Format(PyExc_RuntimeError,
                         "length of the sequence returned by the callable "
-                        "usecols (%d) does not equal the number of fields "
-                        "in the given dtype (%d)",
+                        "usecols (%zd) does not equal the number of fields "
+                        "in the given dtype (%zd)",
                         num_usecols, num_field_types);
                     goto error;
                 }
-
-                // Convert the sequence to a C array of Py_ssize_t ints.
-                usecols = seq_to_ssize_c_array(num_usecols, seq,
-                        "the user-provided callable usecols must return a "
-                        "sequence of ints, but it returned a sequence "
-                        "containing at least one occurrence of type '%s'");
-                Py_DECREF(seq);
-                if (usecols == NULL) {
-                    goto error;
-                }
                 actual_num_fields = num_usecols;
             }
 
@@ -313,7 +350,7 @@ read_rows(stream *s,
 
             if (converters != Py_None) {
                 conv_funcs = create_conv_funcs(
-                        converters, actual_num_fields, usecols);
+                        converters, actual_num_fields, usecols_arr);
                 if (conv_funcs == NULL) {
                     goto error;
                 }
@@ -321,6 +358,11 @@ read_rows(stream *s,
 
             /* Note that result_shape[1] is only used if homogeneous is true */
             result_shape[1] = actual_num_fields;
+            /*
+             * We can now set row_size, the number of bytes in each "row" of
+             * the output array.
+             */
+            row_size = out_descr->elsize;
             if (homogeneous) {
                 row_size *= actual_num_fields;
             }
@@ -378,12 +420,66 @@ read_rows(stream *s,
             data_ptr = PyArray_BYTES(data_array);
         }
 
-        if (!usecols && (actual_num_fields != current_num_fields)) {
-            PyErr_Format(PyExc_ValueError,
-                    "the number of columns changed from %zd to %zd at row %zd; "
-                    "use `usecols` to select a subset and avoid this error",
-                    actual_num_fields, current_num_fields, row_count+1);
-            goto error;
+        if (!usecols_arr) {
+            if (actual_num_fields != current_num_fields) {
+                PyErr_Format(PyExc_ValueError,
+                        "the number of columns changed from %zd to %zd at row "
+                        "%zd; use `usecols` to select a subset and avoid this "
+                        "error",
+                        actual_num_fields, current_num_fields, row_count+1);
+                goto error;
+            }
+        }
+        else {
+            if (usecols_iscallable && current_num_fields != prev_num_fields) {
+                /*
+                 * The number of fields in this row is not the same as in the
+                 * previous row. Call the user-defined function to update
+                 * usecols_arr, and possibly the array of converters.
+                 */
+                Py_ssize_t new_num_usecols;
+                Py_ssize_t *new_usecols_arr = NULL;
+                new_num_usecols = get_usecols_arr_from_callable(usecols_obj,
+                                    current_num_fields, &new_usecols_arr);
+                if (new_num_usecols < 0) {
+                    goto error;
+                }
+                if (new_num_usecols != num_usecols) {
+                    PyErr_Format(PyExc_RuntimeError,
+                            "the length of the sequence returned by the "
+                            "user-defined usecols function (%zd) is not the "
+                            "same as in the previous call (%zd)",
+                            new_num_usecols, num_usecols);
+                    PyMem_Free(new_usecols_arr);
+                    goto error;
+                }
+                bool usecols_changed = false;
+                for (Py_ssize_t i = 0; i < num_usecols; ++i) {
+                    if (new_usecols_arr[i] != usecols_arr[i]) {
+                        usecols_changed = true;
+                        break;
+                    }
+                }
+                if (!usecols_changed) {
+                    PyMem_Free(new_usecols_arr);
+                }
+                else {
+                    PyMem_Free(usecols_arr);
+                    usecols_arr = new_usecols_arr;
+                    if (conv_funcs != NULL) {
+                        /*
+                         * Changing usecols can change the conv_funcs array, so
+                         * free the old one and create a new one.
+                         */
+                        free_conv_funcs(actual_num_fields, conv_funcs);
+                        conv_funcs = create_conv_funcs(
+                                converters, actual_num_fields, usecols_arr);
+                        if (conv_funcs == NULL) {
+                            goto error;
+                        }
+                    }
+                }
+            }
         }
 
         if (NPY_UNLIKELY(data_allocated_rows == row_count)) {
@@ -432,11 +528,11 @@ read_rows(stream *s,
                 item_ptr = data_ptr + field_types[f].structured_offset;
             }
 
-            if (usecols == NULL) {
+            if (usecols_arr == NULL) {
                 col = i;
             }
             else {
-                col = usecols[i];
+                col = usecols_arr[i];
                 if (col < 0) {
                     // Python-like column indexing: k = -1 means the last column.
                     col += current_num_fields;
@@ -445,7 +541,7 @@ read_rows(stream *s,
                     PyErr_Format(PyExc_ValueError,
                             "invalid column index %zd at row %zd with %zd "
                             "columns",
-                            usecols[i], row_count+1, current_num_fields);
+                            usecols_arr[i], row_count+1, current_num_fields);
                     goto error;
                 }
             }
@@ -488,23 +584,16 @@ read_rows(stream *s,
             }
         }
 
+        prev_num_fields = current_num_fields;
         ++row_count;
         data_ptr += row_size;
     }
 
-    if (usecols_obj != Py_None) {
-        // This function owns usecols if a callable usecols_obj was given.
-        PyMem_Free(usecols);
-        usecols = NULL;  // An overabundance of caution...
-    }
+    PyMem_Free(usecols_arr);
 
     tokenizer_clear(&ts);
-    if (conv_funcs != NULL) {
-        for (Py_ssize_t i = 0; i < actual_num_fields; i++) {
-            Py_XDECREF(conv_funcs[i]);
-        }
-        PyMem_FREE(conv_funcs);
-    }
+    free_conv_funcs(actual_num_fields, conv_funcs);
+    field_types_xclear(num_field_types, field_types);
 
     if (data_array == NULL) {
         assert(row_count == 0 && result_shape[0] == 0);
@@ -543,21 +632,15 @@ read_rows(stream *s,
         ((PyArrayObject_fields *)data_array)->dimensions[0] = row_count;
     }
 
+    Py_DECREF(out_descr);
+
     return data_array;
 
   error:
-    if (usecols_obj != Py_None) {
-        // If the error occurred early enough in the function, we might
-        // not have allocated usecols yet, but that's OK, because we know
-        // that usecols is NULL in that case.
-        PyMem_Free(usecols);
-    }
-    if (conv_funcs != NULL) {
-        for (Py_ssize_t i = 0; i < actual_num_fields; i++) {
-            Py_XDECREF(conv_funcs[i]);
-        }
-        PyMem_FREE(conv_funcs);
-    }
+    PyMem_Free(usecols_arr);
+    free_conv_funcs(actual_num_fields, conv_funcs);
+    Py_XDECREF(out_descr);
+    field_types_xclear(num_field_types, field_types);
     tokenizer_clear(&ts);
     Py_XDECREF(data_array);
     return NULL;
diff --git a/numpy/core/src/multiarray/textreading/rows.h b/numpy/core/src/multiarray/textreading/rows.h
index 1a707a2d3918..68c894ffb903 100644
--- a/numpy/core/src/multiarray/textreading/rows.h
+++ b/numpy/core/src/multiarray/textreading/rows.h
@@ -12,12 +12,8 @@
 
 
 NPY_NO_EXPORT PyArrayObject *
-read_rows(stream *s,
-        npy_intp nrows, Py_ssize_t num_field_types, field_type *field_types,
-        parser_config *pconfig,
-        Py_ssize_t num_usecols, Py_ssize_t *usecols, PyObject *usecols_obj,
-        Py_ssize_t skiplines, PyObject *converters,
-        PyArrayObject *data_array, PyArray_Descr *out_descr,
-        bool homogeneous);
+read_rows(stream *s, npy_intp nrows, parser_config *pconfig,
+          PyObject *usecols_obj, Py_ssize_t skiplines, PyObject *converters,
+          PyArrayObject *data_array, PyObject *dtype);
 
 #endif  /* NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_ROWS_H_ */
diff --git a/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.c b/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.c
deleted file mode 100644
index 67a2effef45a..000000000000
--- a/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.c
+++ /dev/null
@@ -1,52 +0,0 @@
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-
-#define NPY_NO_DEPRECATED_API NPY_API_VERSION
-#define _MULTIARRAYMODULE
-#include "numpy/arrayobject.h"  // For NPY_NO_EXPORT
-
-//
-// Convert a Python sequence to a C array of Py_ssize_t integers.
-//
-// `seq` must be a Python sequence of length `len`.  It is assumed that the
-// caller has already checked the length of the sequence, so the length is an
-// argument instead of being inferred from `seq` itself.
-//
-// `errtext` must be provided, and it must contain one occurrence of the
-// format code sequence '%s'.  This text is used as the text of the TypeError
-// that is raised when an element of `seq` is found that cannot be converted
-// to an a Py_ssize_t integer.  The '%s' format code will be replaced with
-// the type of the object that failed the conversion.
-//
-// Returns NULL with an exception set if the conversion fails.
-//
-// The memory for the array is allocated with PyMem_Calloc.
-// The caller must free the memory with PyMem_FREE or PyMem_Free.
-//
-NPY_NO_EXPORT Py_ssize_t *
-seq_to_ssize_c_array(Py_ssize_t len, PyObject *seq, char *errtext)
-{
-    Py_ssize_t *arr = PyMem_Calloc(len, sizeof(Py_ssize_t));
-    if (arr == NULL) {
-        PyErr_NoMemory();
-        return NULL;
-    }
-    for (Py_ssize_t i = 0; i < len; ++i) {
-        PyObject *tmp = PySequence_GetItem(seq, i);
-        if (tmp == NULL) {
-            PyMem_Free(arr);
-            return NULL;
-        }
-        arr[i] = PyNumber_AsSsize_t(tmp, PyExc_OverflowError);
-        if (arr[i] == -1 && PyErr_Occurred()) {
-            if (PyErr_ExceptionMatches(PyExc_TypeError)) {
-                PyErr_Format(PyExc_TypeError, errtext, Py_TYPE(tmp)->tp_name);
-            }
-            Py_DECREF(tmp);
-            PyMem_Free(arr);
-            return NULL;
-        }
-        Py_DECREF(tmp);
-    }
-    return arr;
-}
diff --git a/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.h b/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.h
deleted file mode 100644
index a58d7a01a93c..000000000000
--- a/numpy/core/src/multiarray/textreading/seq_to_ssize_c_array.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef SEQ_TO_SSIZE_C_ARRAY_H
-#define SEQ_TO_SSIZE_C_ARRAY_H
-
-Py_ssize_t *seq_to_ssize_c_array(Py_ssize_t len, PyObject *seq, char *initialtext);
-
-#endif
diff --git a/numpy/core/src/multiarray/textreading/tokenize.cpp b/numpy/core/src/multiarray/textreading/tokenize.cpp
index d0d9cf844912..200b7f438c0d 100644
--- a/numpy/core/src/multiarray/textreading/tokenize.cpp
+++ b/numpy/core/src/multiarray/textreading/tokenize.cpp
@@ -446,6 +446,8 @@ tokenizer_init(tokenizer_state *ts, parser_config *config)
 
     ts->fields = (field_info *)PyMem_Malloc(4 * sizeof(*ts->fields));
     if (ts->fields == nullptr) {
+        PyMem_Free(ts->field_buffer);
+        ts->field_buffer = nullptr;
         PyErr_NoMemory();
         return -1;
     }
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 38a751d11511..c99c37bc0171 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -874,13 +874,13 @@ def __index__(self):
         bogus_idx = 1.5
         assert_raises_regex(
             TypeError,
-            '^usecols must be.*%s' % type(bogus_idx).__name__,
+            '^usecols must contain.*%s' % type(bogus_idx).__name__,
             np.loadtxt, c, usecols=bogus_idx
             )
 
         assert_raises_regex(
             TypeError,
-            '^usecols must be.*%s' % type(bogus_idx).__name__,
+            '^usecols must contain.*%s' % type(bogus_idx).__name__,
             np.loadtxt, c, usecols=[0, bogus_idx, 0]
             )
 
diff --git a/numpy/lib/tests/test_loadtxt.py b/numpy/lib/tests/test_loadtxt.py
index 042f8a44ef6c..dbb66385740a 100644
--- a/numpy/lib/tests/test_loadtxt.py
+++ b/numpy/lib/tests/test_loadtxt.py
@@ -269,6 +269,12 @@ def test_empty_usecols(usecols):
     assert res.dtype == np.dtype([])
 
 
+def test_usecols_dtype_num_fields_mismatch():
+    dt = np.dtype([('x', float), ('y', float)])
+    with pytest.raises(TypeError, match='the number of fields is 2'):
+        np.loadtxt(StringIO('1 2 3 4\n5 6 7 8'), dtype=dt, usecols=[1, 2, 3])
+
+
 def test_callable_usecols_basic():
     txt = StringIO('1 2 3 4 5\n6 7 8 9 1\n2 3 4 5 6\n')
     res = np.loadtxt(txt, dtype=int, usecols=lambda n: list(range(n)[::2]))
@@ -292,11 +298,43 @@ def load(self):
     assert_equal(res, expected)
 
 
+def test_callable_usecols_ragged_input_columns():
+    txt = StringIO('1 2 3\n2 4 6 8 10\n99\n21 22 23')
+    res = np.loadtxt(txt, dtype=int, usecols=lambda n: [0, n - 1, n//2])
+    expected = np.array([[1, 3, 2], [2, 10, 6], [99, 99, 99], [21, 23, 22]])
+    assert_equal(res, expected)
+
+
+# The second 'text' instance has varying numbers of fields in the rows.
+@pytest.mark.parametrize('text', ['1 2 3 4 5\n6 7 8 9 1\n2 3 5 7 9',
+                                  '1 3 5\n6 7 8 9 1\n2 3 7 9'])
+@pytest.mark.parametrize('usecols, expected',
+        [(lambda n: [0, n-1], [[10, 5], [60, 1], [20, 9]]),
+         (lambda n: [0, -1], [[10, -5], [60, -1], [20, -9]])])
+def test_callable_usecols_with_converters(text, usecols, expected):
+    # converters contains the keys 0 and -1.  When usecols returns [0, n-1],
+    # the last column is *not* passed through a converter, because the code
+    # that processes converters treats the key -1 and the positive key n-1
+    # as distinct, and only uses the key that exactly matches what is
+    # given in usecols.
+    converters = {0: lambda s: 10*float(s),
+                  -1: lambda s: -float(s)}
+    res = np.loadtxt(StringIO(text), dtype=int, usecols=usecols,
+                     converters=converters)
+    assert_equal(res, expected)
+
+
 def test_callable_usecols_raises_exception():
     with pytest.raises(ZeroDivisionError, match='division by zero'):
         np.loadtxt(StringIO('1 2\n4 5'), usecols=lambda n: 1/0)
 
 
+def test_callable_usecols_second_call_raises_exception():
+    with pytest.raises(ZeroDivisionError, match='division by zero'):
+        np.loadtxt(StringIO('1 2\n3 4 5'),
+                   usecols=lambda n: [0, 1] if n == 2 else 1/0)
+
+
 def test_callable_usecols_not_a_sequence():
     with pytest.raises(TypeError,
                        match="returned an instance of type 'module'"):
@@ -317,6 +355,12 @@ def test_callable_usecols_bad_len_with_structured_dtype():
         np.loadtxt(txt, usecols=lambda n: [0, 2, 3], dtype=dt)
 
 
+def test_callable_usecols_returned_num_fields_changed():
+    txt = StringIO('1 2 3\n6 7 8 9\n2 3 4 5\n')
+    with pytest.raises(RuntimeError, match="not the same as in the previous"):
+        np.loadtxt(txt, dtype=int, usecols=lambda n: range(n - 1))
+
+
 @pytest.mark.parametrize("c1", ["a", "の", "🫕"])
 @pytest.mark.parametrize("c2", ["a", "の", "🫕"])
 def test_large_unicode_characters(c1, c2):

From 571a5e6334d4465a719f80a6ef8e4cda1c30b931 Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Tue, 28 Jun 2022 14:26:26 -0400
Subject: [PATCH 04/14] BUG: Fix free_conv_funcs() declaration and a few
 compiler warnings.

---
 numpy/core/src/multiarray/textreading/rows.c | 22 +++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index c92878adc1d4..49b4d47cb6ef 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -11,6 +11,7 @@
 #include <string.h>
 #include <stdbool.h>
 
+#include "conversion_utils.h"
 #include "textreading/stream.h"
 #include "textreading/tokenize.h"
 #include "textreading/conversions.h"
@@ -148,7 +149,7 @@ create_conv_funcs(
 }
 
 static void
-free_conv_funcs(Py_ssize_t num_fields, Py_ssize_t *conv_funcs)
+free_conv_funcs(Py_ssize_t num_fields, PyObject **conv_funcs)
 {
     if (conv_funcs != NULL) {
         for (Py_ssize_t i = 0; i < num_fields; i++) {
@@ -195,6 +196,7 @@ read_rows(stream *s,
     Py_ssize_t num_usecols = -1;
     Py_ssize_t *usecols_arr = NULL;
     PyArray_Descr *out_descr = NULL;
+    Py_ssize_t num_field_types = 0;
     field_type *field_types = NULL;
 
     int ts_result = 0;
@@ -234,7 +236,7 @@ read_rows(stream *s,
     /* Hold on to a reference while processing the file. */
     Py_INCREF(out_descr);
 
-    Py_ssize_t num_field_types = field_types_create(out_descr, &field_types);
+    num_field_types = field_types_create(out_descr, &field_types);
     if (num_field_types < 0) {
         goto error;
     }
@@ -251,6 +253,15 @@ read_rows(stream *s,
 
     bool needs_init = PyDataType_FLAGCHK(out_descr, NPY_NEEDS_INIT);
 
+    /*
+     * Initial guess of the number of bytes in each "row" of the output array.
+     * This is correct if the user provided a structure data type.  For a
+     * simple data type (i.e the "homogeneous" case), this will have to be
+     * multiplied by the number of fields in each row (i.e. the number of
+     * columns in the output array).
+     */
+    row_size = out_descr->elsize;
+
     int ndim = homogeneous ? 2 : 1;
     npy_intp result_shape[2] = {0, 1};
 
@@ -358,12 +369,9 @@ read_rows(stream *s,
 
             /* Note that result_shape[1] is only used if homogeneous is true */
             result_shape[1] = actual_num_fields;
-            /*
-             * We can now set row_size, the number of bytes in each "row" of
-             * the output array.
-             */
-            row_size = out_descr->elsize;
+
             if (homogeneous) {
+                /* We're create a 2-d output array, so scale up row_size. */
                 row_size *= actual_num_fields;
             }
 

From 6d69407125ada3b95a7c11f7e884c2d6ee061cf9 Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Tue, 28 Jun 2022 15:05:00 -0400
Subject: [PATCH 05/14] BUG: remove unnecessary (and incorrect) clause from an
 assert()

---
 numpy/core/src/multiarray/textreading/rows.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index 49b4d47cb6ef..e7a27d99817e 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -281,7 +281,7 @@ read_rows(stream *s,
         actual_num_fields = num_usecols;
     }
     else if (!homogeneous) {
-        assert(usecols == NULL || num_field_types == num_usecols);
+        assert(num_field_types == num_usecols);
         actual_num_fields = num_field_types;
     }
 

From 3e30cd825754bcb018575b51eb9bf89f9b48dc3c Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Tue, 28 Jun 2022 15:08:17 -0400
Subject: [PATCH 06/14] Initialize a var to avoid a compiler warning.

---
 numpy/core/src/multiarray/textreading/rows.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index e7a27d99817e..75903d4858b2 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -190,7 +190,7 @@ read_rows(stream *s,
 {
     char *data_ptr = NULL;
     Py_ssize_t current_num_fields;
-    Py_ssize_t prev_num_fields;
+    Py_ssize_t prev_num_fields = 0;  /* Set value to avoid compiler warning */
     PyObject **conv_funcs = NULL;
     npy_intp row_size;
     Py_ssize_t num_usecols = -1;

From 231321ef1a38326188296b14cc7aae63b0908e65 Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Tue, 28 Jun 2022 17:24:05 -0400
Subject: [PATCH 07/14] Ensure PY_SSIZE_T_CLEAN is set before including
 Python.h

---
 numpy/core/src/multiarray/textreading/conversions.c | 1 +
 numpy/core/src/multiarray/textreading/str_to_int.c  | 1 +
 numpy/core/src/multiarray/textreading/tokenize.cpp  | 2 +-
 numpy/core/src/multiarray/textreading/tokenize.h    | 1 +
 4 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/numpy/core/src/multiarray/textreading/conversions.c b/numpy/core/src/multiarray/textreading/conversions.c
index 11f4210f7283..772b5f9c0022 100644
--- a/numpy/core/src/multiarray/textreading/conversions.c
+++ b/numpy/core/src/multiarray/textreading/conversions.c
@@ -1,4 +1,5 @@
 
+#define PY_SSIZE_T_CLEAN
 #include <Python.h>
 
 #include <string.h>
diff --git a/numpy/core/src/multiarray/textreading/str_to_int.c b/numpy/core/src/multiarray/textreading/str_to_int.c
index 0dd6c0b0e66a..0fab15eeabc4 100644
--- a/numpy/core/src/multiarray/textreading/str_to_int.c
+++ b/numpy/core/src/multiarray/textreading/str_to_int.c
@@ -1,4 +1,5 @@
 
+#define PY_SSIZE_T_CLEAN
 #include <Python.h>
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
diff --git a/numpy/core/src/multiarray/textreading/tokenize.cpp b/numpy/core/src/multiarray/textreading/tokenize.cpp
index 200b7f438c0d..bc27e0d4cd34 100644
--- a/numpy/core/src/multiarray/textreading/tokenize.cpp
+++ b/numpy/core/src/multiarray/textreading/tokenize.cpp
@@ -1,4 +1,4 @@
-
+#define PY_SSIZE_T_CLEAN
 #include <Python.h>
 
 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
diff --git a/numpy/core/src/multiarray/textreading/tokenize.h b/numpy/core/src/multiarray/textreading/tokenize.h
index a78c6d936c2a..e374e8460ee4 100644
--- a/numpy/core/src/multiarray/textreading/tokenize.h
+++ b/numpy/core/src/multiarray/textreading/tokenize.h
@@ -2,6 +2,7 @@
 #ifndef NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_TOKENIZE_H_
 #define NUMPY_CORE_SRC_MULTIARRAY_TEXTREADING_TOKENIZE_H_
 
+#define PY_SSIZE_T_CLEAN
 #include <Python.h>
 #include "numpy/ndarraytypes.h"
 

From d48333c437c2d3ebf7f7964c29cc121ab351899c Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Tue, 28 Jun 2022 20:24:08 -0400
Subject: [PATCH 08/14] TMP: Check sizes of integers

---
 numpy/core/src/multiarray/textreading/readtext.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/numpy/core/src/multiarray/textreading/readtext.c b/numpy/core/src/multiarray/textreading/readtext.c
index 0d61bac1324d..bf48476f4eaf 100644
--- a/numpy/core/src/multiarray/textreading/readtext.c
+++ b/numpy/core/src/multiarray/textreading/readtext.c
@@ -159,6 +159,13 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
         return NULL;
     }
 
+    if (sizeof(npy_intp) != sizeof(Py_ssize_t)) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "sizeof(npy_intp) = %zu, sizeof(Py_ssize_t) = %zu\n",
+                     sizeof(npy_intp), sizeof(Py_ssize_t));
+        return NULL;
+    }
+
     /* Reject matching control characters, they just rarely make sense anyway */
     if (error_if_matching_control_characters(
             pc.delimiter, pc.quote, pc.comment) < 0) {

From b28370900807bf14764cee0abb1e3071fb8c8df0 Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Tue, 28 Jun 2022 20:29:47 -0400
Subject: [PATCH 09/14] TMP: Move code that checks sizes of integers

---
 numpy/core/src/multiarray/textreading/readtext.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/numpy/core/src/multiarray/textreading/readtext.c b/numpy/core/src/multiarray/textreading/readtext.c
index bf48476f4eaf..eebeba65e325 100644
--- a/numpy/core/src/multiarray/textreading/readtext.c
+++ b/numpy/core/src/multiarray/textreading/readtext.c
@@ -139,6 +139,13 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
     };
     bool filelike = true;
 
+    if (sizeof(npy_intp) != sizeof(Py_ssize_t)) {
+        PyErr_Format(PyExc_RuntimeError,
+                     "sizeof(npy_intp) = %zu, sizeof(Py_ssize_t) = %zu\n",
+                     sizeof(npy_intp), sizeof(Py_ssize_t));
+        return NULL;
+    }
+
     NPY_PREPARE_ARGPARSER;
     if (npy_parse_arguments("_load_from_filelike", args, len_args, kwnames,
             "file", NULL, &file,
@@ -159,13 +166,6 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
         return NULL;
     }
 
-    if (sizeof(npy_intp) != sizeof(Py_ssize_t)) {
-        PyErr_Format(PyExc_RuntimeError,
-                     "sizeof(npy_intp) = %zu, sizeof(Py_ssize_t) = %zu\n",
-                     sizeof(npy_intp), sizeof(Py_ssize_t));
-        return NULL;
-    }
-
     /* Reject matching control characters, they just rarely make sense anyway */
     if (error_if_matching_control_characters(
             pc.delimiter, pc.quote, pc.comment) < 0) {

From 700dcb74db172fd70aee48d56356eb92b919852c Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Tue, 28 Jun 2022 21:54:10 -0400
Subject: [PATCH 10/14] Revert previous TMP commit.

---
 numpy/core/src/multiarray/textreading/readtext.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/numpy/core/src/multiarray/textreading/readtext.c b/numpy/core/src/multiarray/textreading/readtext.c
index eebeba65e325..0d61bac1324d 100644
--- a/numpy/core/src/multiarray/textreading/readtext.c
+++ b/numpy/core/src/multiarray/textreading/readtext.c
@@ -139,13 +139,6 @@ _load_from_filelike(PyObject *NPY_UNUSED(mod),
     };
     bool filelike = true;
 
-    if (sizeof(npy_intp) != sizeof(Py_ssize_t)) {
-        PyErr_Format(PyExc_RuntimeError,
-                     "sizeof(npy_intp) = %zu, sizeof(Py_ssize_t) = %zu\n",
-                     sizeof(npy_intp), sizeof(Py_ssize_t));
-        return NULL;
-    }
-
     NPY_PREPARE_ARGPARSER;
     if (npy_parse_arguments("_load_from_filelike", args, len_args, kwnames,
             "file", NULL, &file,

From 9c7bf60a0796c7f3ddd20a07eb3cd8d7c6877a1d Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Tue, 28 Jun 2022 21:56:09 -0400
Subject: [PATCH 11/14] Remove invalid assert()

---
 numpy/core/src/multiarray/textreading/rows.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index 75903d4858b2..b5c49396af24 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -281,7 +281,6 @@ read_rows(stream *s,
         actual_num_fields = num_usecols;
     }
     else if (!homogeneous) {
-        assert(num_field_types == num_usecols);
         actual_num_fields = num_field_types;
     }
 

From 69ab6c3b70adab24df44a9a4edfa4928868af1af Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Fri, 1 Jul 2022 01:31:07 -0400
Subject: [PATCH 12/14] get_usecols_arr_from_callable now handles the initial
 and subsequent calls.

---
 numpy/core/src/multiarray/textreading/rows.c | 162 +++++++++++--------
 1 file changed, 95 insertions(+), 67 deletions(-)

diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index b5c49396af24..14bb70b8c2aa 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -25,36 +25,89 @@
  */
 #define MIN_BLOCK_SIZE (1 << 13)
 
-
+#define USECOLS_ERROR    -1
+#define USECOLS_NOCHANGE  0
+#define USECOLS_CHANGED   1
 /*
- * Call the user-defined usecols callable and create the C array of
- * Py_ssize_t integers.
+ * Call the user-defined usecols callable and create or update the
+ * C array of Py_ssize_t integers.
+ *
+ * On input, *p_usecols_arr must be either NULL or the result of
+ * a previous call to this function.  If *p_usecols is not NULL,
+ * *p_num_usecols must be the length of the array pointed to by
+ * *p_usecols_arr.
  *
- * Returns the size of the array, and sets *p_usecols_arr to the
- * C array, allocated with PyMem_Calloc.  The caller is responsible
- * for freeing this memory with PyMem_Free().
+ * Return values:
+ *   USECOLS_ERROR (-1)
+ *       Error.  An exception has been set. The values pointed to by
+ *       p_num_usecols and p_usecols_arr have not been changed.
+ *   USECOLS_NOCHANGE (0)
+ *       The usecols data has not changed.  This value can be returned
+ *       only if p_usecols_arr was not NULL.
+ *   USECOLS_CHANGED (1)
+ *       The usecols data has changed.
  *
- * Returns -1 with an exception set and *p_usecols_arr set to NULL
- * if the call or conversion fails.
+ * If no error occurs and *p_usecols_arr was NULL on input, then
+ * on return, *p_usecol_arr points to an array with length
+ * *p_num_usecols.  If *p_usecols_arr was not NULL, then on
+ * output *p_num_usecols is not change, and whether or not
+ * *p_usecol_arr has changed is indicated by the return value.
  */
-static Py_ssize_t
+static int
 get_usecols_arr_from_callable(PyObject *usecols_obj, Py_ssize_t n,
+                              Py_ssize_t *p_num_usecols,
                               Py_ssize_t **p_usecols_arr)
 {
-    *p_usecols_arr = NULL;
+    Py_ssize_t new_num_usecols;
+    Py_ssize_t *new_usecols_arr = NULL;
     PyObject *seq = PyObject_CallFunction(usecols_obj, "n", n);
     if (seq == NULL) {
-        return -1;
+        return USECOLS_ERROR;
     }
-    Py_ssize_t num_usecols = PyArray_SeqToSsizeCArray(seq, p_usecols_arr,
-                    "the user-provided callable usecols must return a "
-                    "sequence of ints, but it returned an instance of "
-                    "type '%s'",
-                    "the user-provided callable usecols must return a "
-                    "sequence of ints, but it returned a sequence "
-                    "containing at least one occurrence of type '%s'");
+    new_num_usecols = PyArray_SeqToSsizeCArray(seq, &new_usecols_arr,
+                        "the user-provided callable usecols must return a "
+                        "sequence of ints, but it returned an instance of "
+                        "type '%s'",
+                        "the user-provided callable usecols must return a "
+                        "sequence of ints, but it returned a sequence "
+                        "containing at least one occurrence of type '%s'");
     Py_DECREF(seq);
-    return num_usecols;
+    if (new_num_usecols < 0) {
+        return USECOLS_ERROR;
+    }
+    if (*p_usecols_arr == NULL) {
+        /* This is an initial call. */
+        *p_num_usecols = new_num_usecols;
+        *p_usecols_arr = new_usecols_arr;
+        return USECOLS_CHANGED;
+    }
+    /* This is a update call. */
+    if (new_num_usecols != *p_num_usecols) {
+        /* A new call of usecols returned a different length. */
+        PyErr_Format(PyExc_RuntimeError,
+                     "the length of the sequence returned by the "
+                     "user-defined usecols function (%zd) is not the "
+                     "same as in the previous call (%zd)",
+                     new_num_usecols, *p_num_usecols);
+        PyMem_Free(new_usecols_arr);
+        return USECOLS_ERROR;
+    }
+    /* Check if the usecols data is different. */
+    bool usecols_changed = false;
+    for (Py_ssize_t i = 0; i < new_num_usecols; ++i) {
+        if (new_usecols_arr[i] != *p_usecols_arr[i]) {
+            usecols_changed = true;
+            break;
+        }
+    }
+    if (!usecols_changed) {
+        PyMem_Free(new_usecols_arr);
+        return USECOLS_NOCHANGE;
+    }
+    /* The data is different; update the pointer to the new array. */
+    PyMem_Free(*p_usecols_arr);
+    *p_usecols_arr = new_usecols_arr;
+    return USECOLS_CHANGED;
 }
 
 /*
@@ -338,9 +391,9 @@ read_rows(stream *s,
             prev_num_fields = current_num_fields;
 
             if (usecols_iscallable) {
-                num_usecols = get_usecols_arr_from_callable(usecols_obj,
-                                    current_num_fields, &usecols_arr);
-                if (num_usecols < 0) {
+                int status = get_usecols_arr_from_callable(usecols_obj,
+                               current_num_fields, &num_usecols, &usecols_arr);
+                if (status == USECOLS_ERROR) {
                     goto error;
                 }
                 if (!homogeneous && num_field_types != num_usecols) {
@@ -437,55 +490,30 @@ read_rows(stream *s,
                 goto error;
             }
         }
-        else {
-            if (usecols_iscallable && current_num_fields != prev_num_fields) {
+        else if (NPY_UNLIKELY(current_num_fields != prev_num_fields)
+                 && usecols_iscallable) {
+            /*
+             * The number of fields in this row is not the same as in the
+             * previous row. Call the user-defined function to update
+             * usecols_arr.  Then update the array of converters, if
+             * necessary.
+             */
+            int status = get_usecols_arr_from_callable(usecols_obj,
+                            current_num_fields, &num_usecols, &usecols_arr);
+            if (status == USECOLS_ERROR) {
+                goto error;
+            }
+            if (status == USECOLS_CHANGED && conv_funcs != NULL) {
                 /*
-                 * The number of fields in this row is not the same as in the
-                 * previous row. Call the user-defined function to update
-                 * usecols_arr, and possibly the array of converters.
+                 * Changing usecols can change the conv_funcs array, so
+                 * free the old one and create a new one.
                  */
-                Py_ssize_t new_num_usecols;
-                Py_ssize_t *new_usecols_arr = NULL;
-                new_num_usecols = get_usecols_arr_from_callable(usecols_obj,
-                                    current_num_fields, &new_usecols_arr);
-                if (new_num_usecols < 0) {
-                    goto error;
-                }
-                if (new_num_usecols != num_usecols) {
-                    PyErr_Format(PyExc_RuntimeError,
-                            "the length of the sequence returned by the "
-                            "user-defined usecols function (%zd) is not the "
-                            "same as in the previous call (%zd)",
-                            new_num_usecols, num_usecols);
-                    PyMem_Free(new_usecols_arr);
+                free_conv_funcs(actual_num_fields, conv_funcs);
+                conv_funcs = create_conv_funcs(
+                        converters, actual_num_fields, usecols_arr);
+                if (conv_funcs == NULL) {
                     goto error;
                 }
-                bool usecols_changed = false;
-                for (Py_ssize_t i = 0; i < num_usecols; ++i) {
-                    if (new_usecols_arr[i] != usecols_arr[i]) {
-                        usecols_changed = true;
-                        break;
-                    }
-                }
-                if (!usecols_changed) {
-                    PyMem_Free(new_usecols_arr);
-                }
-                else {
-                    PyMem_Free(usecols_arr);
-                    usecols_arr = new_usecols_arr;
-                    if (conv_funcs != NULL) {
-                        /*
-                         * Changing usecols can change the conv_funcs array, so
-                         * free the old one and create a new one.
-                         */
-                        free_conv_funcs(actual_num_fields, conv_funcs);
-                        conv_funcs = create_conv_funcs(
-                                converters, actual_num_fields, usecols_arr);
-                        if (conv_funcs == NULL) {
-                            goto error;
-                        }
-                    }
-                }
             }
         }
 

From b7e02ddc4ed2106dc693b688bc6b84ba7da073f2 Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Wed, 6 Jul 2022 00:21:57 -0400
Subject: [PATCH 13/14] Expand some code comments.

---
 numpy/core/src/multiarray/conversion_utils.c | 8 ++++++++
 numpy/core/src/multiarray/textreading/rows.c | 7 +++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/numpy/core/src/multiarray/conversion_utils.c b/numpy/core/src/multiarray/conversion_utils.c
index bb4a14b6d7d3..a4914c9f144c 100644
--- a/numpy/core/src/multiarray/conversion_utils.c
+++ b/numpy/core/src/multiarray/conversion_utils.c
@@ -1167,6 +1167,14 @@ PyArray_IntpFromSequence(PyObject *seq, npy_intp *vals, int maxvals)
 // The memory for the array is allocated with PyMem_Calloc.
 // The caller must free the memory with PyMem_FREE or PyMem_Free.
 //
+// If the length of the sequence is 0, a non-NULL pointer is returned
+// in *parr, but it must not be dereferenced.  To quote the PyMem_Calloc
+// documentation:
+//
+//    Requesting zero elements or elements of size zero bytes returns a
+//    distinct non-NULL pointer if possible, as if PyMem_Calloc(1, 1) had
+//    been called instead.
+//
 // Returns -1 with an exception set and with *parr set to NULL if the
 // conversion fails.
 //
diff --git a/numpy/core/src/multiarray/textreading/rows.c b/numpy/core/src/multiarray/textreading/rows.c
index 14bb70b8c2aa..3cf41381701e 100644
--- a/numpy/core/src/multiarray/textreading/rows.c
+++ b/numpy/core/src/multiarray/textreading/rows.c
@@ -10,6 +10,7 @@
 
 #include <string.h>
 #include <stdbool.h>
+#include <stdio.h>
 
 #include "conversion_utils.h"
 #include "textreading/stream.h"
@@ -32,8 +33,8 @@
  * Call the user-defined usecols callable and create or update the
  * C array of Py_ssize_t integers.
  *
- * On input, *p_usecols_arr must be either NULL or the result of
- * a previous call to this function.  If *p_usecols is not NULL,
+ * On input, *p_usecols_arr must be either NULL or the result of a
+ * previous call to this function.  If *p_usecols_arr is not NULL,
  * *p_num_usecols must be the length of the array pointed to by
  * *p_usecols_arr.
  *
@@ -112,6 +113,8 @@ get_usecols_arr_from_callable(PyObject *usecols_obj, Py_ssize_t n,
 
 /*
  *  Create the array of converter functions from the Python converters.
+ *  Elements of conv_funcs for which a corresponding converter has not
+ *  been given will be NULL.
  */
 static PyObject **
 create_conv_funcs(

From 28c32cadaf480cad872688c829fa06f323033058 Mon Sep 17 00:00:00 2001
From: warren <warren.weckesser@gmail.com>
Date: Wed, 6 Jul 2022 00:22:22 -0400
Subject: [PATCH 14/14] Split a test in test_loadtxt.py

---
 numpy/lib/tests/test_loadtxt.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/numpy/lib/tests/test_loadtxt.py b/numpy/lib/tests/test_loadtxt.py
index dbb66385740a..c51e6fececbd 100644
--- a/numpy/lib/tests/test_loadtxt.py
+++ b/numpy/lib/tests/test_loadtxt.py
@@ -226,7 +226,7 @@ def test_converters_negative_indices():
 
 
 @pytest.mark.parametrize('usecols', [[0, -1], lambda n: [0, -1]])
-def test_converters_negative_indices_with_usecols(usecols):
+def test_converters_neg_indices_usecols(usecols):
     txt = StringIO('1.5,2.5,3.5\n3.0,4.0,XXX\n5.5,6.0,7.5\n')
     conv = {-1: lambda s: np.nan if s == 'XXX' else float(s)}
     expected = np.array([[1.5, 3.5], [3.0, np.nan], [5.5, 7.5]])
@@ -240,8 +240,10 @@ def test_converters_negative_indices_with_usecols(usecols):
     )
     assert_equal(res, expected)
 
-    # Second test with variable number of rows:
-    res = np.loadtxt(StringIO('''0,1,2\n0,1,2,3,4'''), delimiter=",",
+
+@pytest.mark.parametrize('usecols', [[0, -1], lambda n: [0, -1]])
+def test_converters_neg_indices_usecols_varying_fields(usecols):
+    res = np.loadtxt(StringIO('0,1,2\n0,1,2,3,4'), delimiter=",",
                      usecols=usecols, converters={-1: (lambda x: -1)})
     assert_array_equal(res, [[0, -1], [0, -1]])