diff --git a/.appveyor.yml b/.appveyor.yml
deleted file mode 100644
index 0effb3f8..00000000
--- a/.appveyor.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-version: 1.0.{build}
-
-os: Visual Studio 2015
-
-platform: x64
-
-
-matrix:
-  fast_finish: true
-
-# Should speed up repository cloning
-#
-shallow_clone: true
-clone_depth: 5
-
-#init:
-#  - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
-
-#on_finish:
-#  - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
-
-cache:
-    C:\Miniconda3-x64\pkgs -> appveyor.yml
-
-install:
- - call scripts\\appveyor\\config.cmd
-
-build_script:
- - call scripts\\appveyor\\build.cmd
-
-test_script:
- - call scripts\\appveyor\\test.cmd
- -
diff --git a/.github/environment.yml b/.github/environment.yml
new file mode 100644
index 00000000..d18b1d3a
--- /dev/null
+++ b/.github/environment.yml
@@ -0,0 +1,12 @@
+name: testenv
+channels:
+  - conda-forge
+dependencies:
+  - scikit-build-core
+  - numpy
+  - compilers
+  - pybind11
+  - libpdal-core
+  - pytest
+  - meshio
+  - geopandas
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 00000000..fc69ec07
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,78 @@
+name: Build
+
+on:
+  pull_request:
+    branches:
+    - '*'
+  push:
+    branches:
+    - '*'
+  release:
+    types:
+      - published
+
+defaults:
+  run:
+    shell: bash -l -eo pipefail {0}
+
+jobs:
+  build:
+    name: ${{ matrix.os }} py${{ matrix.python-version }} numpy ${{ matrix.numpy-version  }}
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: true
+      matrix:
+        os: ['ubuntu-latest', 'macos-latest', 'windows-latest']
+        python-version: ['3.10', '3.11', '3.12', '3.13']
+        numpy-version: ['1.24', '2.1']
+        exclude:
+          - python-version: '3.12'
+            numpy-version: '1.24'
+          - python-version: '3.13'
+            numpy-version: '1.24'
+
+    steps:
+    - name: Check out python-pdal
+      uses: actions/checkout@v4
+
+    - name: Check out python-pdal-plugins
+      uses: actions/checkout@v4
+      with:
+        repository: PDAL/python-plugins
+        path: ./plugins
+        ref: main
+
+    - name: Setup micromamba
+      uses: conda-incubator/setup-miniconda@v3
+      with:
+        miniforge-variant: Miniforge3
+        miniforge-version: latest
+        python-version: ${{ matrix.python-version }}
+        use-mamba: true
+        auto-update-conda: true
+        environment-file: .github/environment.yml
+
+    - name: Install numpy ${{ matrix.numpy-version }}
+      run: |
+        mamba install -y numpy=${{ matrix.numpy-version }}
+
+    - name: Install python-pdal
+      run: |
+        pip install -vv . --no-deps --no-build-isolation
+
+    - name: Install python-pdal-plugins
+      working-directory: ./plugins
+      run: pip install -vv . --no-deps --no-build-isolation
+
+    - name: Test
+      run: |
+        export PDAL_DRIVER_PATH=$(python -m pdal --pdal-driver-path)
+        export PDAL_PLUGIN_PATH=$(python -m pdal --pdal-plugin-path)
+        echo "PDAL_DRIVER_PATH $PDAL_DRIVER_PATH"
+        echo "PDAL_PLUGIN_PATH $PDAL_PLUGIN_PATH"
+        export PDAL_DRIVER_PATH=$PDAL_PLUGIN_PATH:$PDAL_DRIVER_PATH
+        python -m pdal
+        pdal --drivers --debug
+        python -m pytest -v test/
+
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 00000000..99b22df6
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,62 @@
+name: Release
+
+on:
+  workflow_dispatch:
+  pull_request:
+    paths:
+      - '.github/workflows/release.yml'
+  push:
+    branches:
+      - main
+    paths:
+      - '.github/workflows/release.yml'
+  release:
+    types:
+      - published
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+
+  build_sdist:
+    name: Build source distribution
+    runs-on: ubuntu-latest
+    environment:
+      name: release
+      url: https://pypi.org/p/pdal-plugins
+    permissions:
+      id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing
+    strategy:
+      fail-fast: true
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Setup environment
+      uses: conda-incubator/setup-miniconda@v3
+      with:
+        miniforge-variant: Miniforge3
+        miniforge-version: latest
+        python-version: '3.12'
+        use-mamba: true
+        auto-update-conda: true
+        environment-file: .github/environment.yml
+
+    - name: Install dependencies
+      shell: bash -l {0}
+      run: |
+        python -m pip install build pipx twine
+        pipx run build --sdist -Ccmake.define.CMAKE_BUILD_WITH_INSTALL_RPATH=ON
+
+    - uses: actions/upload-artifact@v4
+      with:
+        name: cibw-sdist
+        path: dist/*.tar.gz
+
+    - name: Publish package distributions to PyPI
+      if: github.event_name == 'release' && github.event.action == 'published'
+      uses: pypa/gh-action-pypi-publish@release/v1
+
+
diff --git a/.gitignore b/.gitignore
index 42f64d0d..4cb02b15 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,11 @@
-pdal/libpdalpython.cpp
 *.pyc
+_skbuild/*
+.vscode/*
 __pycache__
 build/*
 PDAL.egg-info/*
 dist/*
+*.o
+*.so
+*.dylib
+.DS_Store
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 5bc813b1..00000000
--- a/.travis.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-# .travis.yml
-# Configure Travis CI service for http://github.com/PDAL/python
-
-sudo: required
-
-services: docker
-
-before_install:
-  - docker pull pdal/pdal:latest
-
-script:
-  - docker run -v $TRAVIS_BUILD_DIR:/pdal -t pdal/pdal:latest /bin/sh -c "/pdal/scripts/travis/script.sh"
-
diff --git a/CHANGES.txt b/CHANGES.txt
index b278b425..7e38fb77 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,12 +1,112 @@
 Changes
-================================================================================
+--------------------------------------------------------------------------------
+
+
+3.2.3
+................................................................................
+
+Do not build and include wheels in distro
+
+
+3.2.2
+................................................................................
+
+* Implement move ctor to satisfy MSVC 2019 https://github.com/PDAL/python/commit/667f56bd0ee465f55a14636986e80b0a9cefcf14
+
+
+3.2.1
+................................................................................
+
+* implement #129, add pandas DataFrame i/o for convenience by @hobu in
+  https://github.com/PDAL/python/pull/130
+* harden getMetadata and related calls from getting non-utf-8 'json'  by @hobu
+  in https://github.com/PDAL/python/pull/140
+* ignore DataFrame test if not GeoPandas, give up on Python 3.7 builds by @hobu
+  in https://github.com/PDAL/python/pull/137
+
+3.2.0
+................................................................................
+
+* PDAL base library 2.4.0+ is required
+
+* CMake project name updated to pdal-python
+
+* `srswkt2` property added to allow fetching of SRS info
+
+* pip builds require cmake >= 3.11
+
+* CMAKE_CXX_STANDARD set to c++17 to match PDAL 2.4.x
+
+* Driver and options *actually* uses the library instead of
+  shelling out to `pdal` application :)
+
+* _get_json renamed to toJSON and made public
+
+* Fix #119, 'json' optional kwarg put back for now
+
+* DEVELOPMENT_COMPONENT in CMake FindPython skipped on OSX
+
+* Make sure 'type' gets set when serializing to JSON
+
+3.1.0
+................................................................................
+
+* **Breaking change** – pipeline.metadata now returns a dictionary from
+  json.loads instead of a string.
+
+* pipeline.quickinfo will fetch the PDAL preview() information for a data source.
+  You can use this to fetch header or other information without reading data.
+  https://github.com/PDAL/python/pull/109
+
+* PDAL driver and option collection now uses the PDAL library directly rather
+  than shelling out to the pdal command https://github.com/PDAL/python/pull/107
+
+* Pipelines now support pickling for use with things like Dask
+  https://github.com/PDAL/python/pull/110
+
+
+
+3.0.0
+................................................................................
+
+* Pythonic pipeline creation https://github.com/PDAL/python/pull/91
+
+* Support streaming pipeline execution https://github.com/PDAL/python/pull/94
+
+* Replace Cython with PyBind11 https://github.com/PDAL/python/pull/102
+
+* Remove pdal.pio module https://github.com/PDAL/python/pull/101
+
+* Move readers.numpy and filters.python to separate repository https://github.com/PDAL/python/pull/104
+
+* Miscellaneous refactorings and cleanups
+
+2.3.5
+................................................................................
+
+* Fix memory leak https://github.com/PDAL/python/pull/74
+
+* Handle metadata with invalid unicode by erroring https://github.com/PDAL/python/pull/74
+
+2.3.0
+................................................................................
+
+* PDAL Python support 2.3.0 requires PDAL 2.1+. Older PDAL base libraries
+  likely will not work.
+
+* Python support built using scikit-build
+
+* readers.numpy and filters.python are installed along with the extension.
+
+* Pipeline can take in a list of arrays that are passed to readers.numpy
+
+* readers.numpy now supports functions that return arrays. See
+  https://pdal.io/stages/readers.numpy.html for more detail.
 
 2.0.0
---------------------------------------------------------------------------------
+................................................................................
 
 * PDAL Python extension is now in its own repository on its own release
   schedule at https://github.com/PDAL/python
 
 * Extension now builds and works under PDAL OSGeo4W64 on Windows.
-
-
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 00000000..61610cfb
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,41 @@
+cmake_minimum_required(VERSION 3.16.0)
+project(pdal-python VERSION ${SKBUILD_PROJECT_VERSION}
+                    DESCRIPTION "PDAL Python bindings"
+                    HOMEPAGE_URL "https://github.com/PDAL/Python")
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_CXX_EXTENSIONS OFF)
+
+# Python-finding settings
+set(Python3_FIND_STRATEGY "LOCATION")
+set(Python3_FIND_REGISTRY "LAST")
+set(Python3_FIND_FRAMEWORK "LAST")
+
+# Development vs. Development.Module
+# https://cmake.org/cmake/help/latest/module/FindPython3.html?highlight=Development.Module
+if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18.0" AND NOT APPLE)
+    set(DEVELOPMENT_COMPONENT "Development.Module")
+else()
+    set(DEVELOPMENT_COMPONENT "Development")
+endif()
+
+# find Python3
+find_package(Python3 COMPONENTS Interpreter ${DEVELOPMENT_COMPONENT} NumPy REQUIRED)
+
+# find PDAL. Require 2.1+
+find_package(PDAL 2.7 REQUIRED)
+
+# find PyBind11
+find_package(pybind11 REQUIRED)
+
+set(extension "libpdalpython")
+pybind11_add_module(${extension} MODULE
+    src/pdal/PyArray.cpp
+    src/pdal/PyPipeline.cpp
+    src/pdal/StreamableExecutor.cpp
+    src/pdal/libpdalpython.cpp
+)
+target_include_directories(${extension} PRIVATE ${Python3_NumPy_INCLUDE_DIRS})
+target_link_libraries(${extension} PRIVATE ${PDAL_LIBRARIES})
+install(TARGETS ${extension} LIBRARY DESTINATION "pdal")
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 57a62d95..00000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,6 +0,0 @@
-exclude *.txt
-exclude MANIFEST.in
-include CHANGES.txt README.rst
-recursive-include test *.py
-recursive-include pdal *.pyx
-recursive-include pdal *.hpp
diff --git a/PKG-INFO b/PKG-INFO
deleted file mode 100644
index 7d2bfc03..00000000
--- a/PKG-INFO
+++ /dev/null
@@ -1,75 +0,0 @@
-Metadata-Version: 1.1
-Name: PDAL
-Version: 1.6.0
-Summary: Point cloud data processing
-Home-page: http://pdal.io
-Author: Howard Butler
-Author-email: howard@hobu.co
-License: BSD
-Description: ================================================================================
-        PDAL
-        ================================================================================
-        
-        The PDAL Python extension allows you to process data with PDAL into `Numpy`_
-        arrays. Additionally, you can use it to fetch `schema`_ and `metadata`_ from
-        PDAL operations.
-        
-        Usage
-        --------------------------------------------------------------------------------
-        
-        Given the following pipeline, which simply reads an `ASPRS LAS`_ file and
-        sorts it by the ``X`` dimension:
-        
-        .. _`ASPRS LAS`: https://www.asprs.org/committee-general/laser-las-file-format-exchange-activities.html
-        
-        .. code-block:: python
-        
-        
-            json = """
-            {
-              "pipeline": [
-                "1.2-with-color.las",
-                {
-                    "type": "filters.sort",
-                    "dimension": "X"
-                }
-              ]
-            }"""
-        
-            import pdal
-            pipeline = pdal.Pipeline(json)
-            pipeline.validate() # check if our JSON and options were good
-            pipeline.loglevel = 9 #really noisy
-            count = pipeline.execute()
-            arrays = pipeline.arrays
-            metadata = pipeline.metadata
-            log = pipeline.log
-        
-        
-        .. _`Numpy`: http://www.numpy.org/
-        .. _`schema`: http://www.pdal.io/dimensions.html
-        .. _`metadata`: http://www.pdal.io/development/metadata.html
-        
-        Requirements
-        ================================================================================
-        
-        * PDAL 1.4+
-        * Python >=2.7 (including Python 3.x)
-        
-        
-        
-        Changes
-        ================================================================================
-        
-        
-Keywords: point cloud spatial
-Platform: UNKNOWN
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Intended Audience :: Developers
-Classifier: Intended Audience :: Science/Research
-Classifier: License :: OSI Approved :: BSD License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python :: 2.7
-Classifier: Programming Language :: Python :: 3
-Classifier: Topic :: Scientific/Engineering :: GIS
-Requires: Python (>=2.7)
diff --git a/README.rst b/README.rst
index 7203e881..ab90fd28 100644
--- a/README.rst
+++ b/README.rst
@@ -2,17 +2,52 @@
 PDAL
 ================================================================================
 
-The PDAL Python extension allows you to process data with PDAL into `Numpy`_
-arrays. Additionally, you can use it to fetch `schema`_ and `metadata`_ from
-PDAL operations.
+PDAL Python support allows you to process data with PDAL into `Numpy`_ arrays.
+It provides a PDAL extension module to control Python interaction with PDAL.
+Additionally, you can use it to fetch `schema`_ and `metadata`_ from PDAL operations.
+
+Installation
+--------------------------------------------------------------------------------
+
+**Note** The PDAL Python bindings require the PDAL base library installed. Source code can be found at https://pdal.io and `GitHub <https://github.com/PDAL/PDAL>`__.
+
+PyPI
+................................................................................
+
+PDAL Python support is installable via PyPI:
+
+.. code-block::
+
+    pip install PDAL
+
+
+Developers can control many settings including debug builds and where the libraries are installed
+using `scikit-build-core <https://scikit-build-core.readthedocs.io>`_ settings:
+
+.. code-block::
+
+    python -m pip install \
+        -Cbuild-dir=build \
+        -e \
+        . \
+        --config-settings=cmake.build-type="Debug" \
+        -vv \
+        --no-deps \
+        --no-build-isolation
+
+GitHub
+................................................................................
 
 The repository for PDAL's Python extension is available at https://github.com/PDAL/python
 
-It is released independently from PDAL itself as of PDAL 1.7.
+Python support released independently from PDAL itself as of PDAL 1.7.
 
 Usage
 --------------------------------------------------------------------------------
 
+Simple
+................................................................................
+
 Given the following pipeline, which simply reads an `ASPRS LAS`_ file and
 sorts it by the ``X`` dimension:
 
@@ -34,30 +69,430 @@ sorts it by the ``X`` dimension:
 
     import pdal
     pipeline = pdal.Pipeline(json)
-    pipeline.validate() # check if our JSON and options were good
-    pipeline.loglevel = 8 #really noisy
     count = pipeline.execute()
     arrays = pipeline.arrays
     metadata = pipeline.metadata
     log = pipeline.log
 
+Programmatic Pipeline Construction
+................................................................................
+
+The previous example specified the pipeline as a JSON string. Alternatively, a
+pipeline can be constructed by creating ``Stage`` instances and piping them
+together. For example, the previous pipeline can be specified as:
+
+.. code-block:: python
+
+    pipeline = pdal.Reader("1.2-with-color.las") | pdal.Filter.sort(dimension="X")
+
+Stage Objects
+=============
+
+- A stage is an instance of ``pdal.Reader``, ``pdal.Filter`` or ``pdal.Writer``.
+- A stage can be instantiated by passing as keyword arguments the options
+  applicable to the respective PDAL stage. For more on PDAL stages and their
+  options, check the PDAL documentation on `Stage Objects <https://pdal.io/pipeline.html#stage-objects>`__.
+
+  - The ``filename`` option of ``Readers`` and ``Writers`` as well as the ``type``
+    option of ``Filters`` can be passed positionally as the first argument.
+  - The ``inputs`` option specifies a sequence of stages to be set as input to the
+    current stage. Each input can be either the string tag of another stage, or
+    the ``Stage`` instance itself.
+- The ``Reader``, ``Filter`` and ``Writer`` classes come with static methods for
+  all the respective PDAL drivers. For example, ``pdal.Filter.head()`` is a
+  shortcut for ``pdal.Filter(type="filters.head")``. These methods are
+  auto-generated by introspecting ``pdal`` and the available options are
+  included in each method's docstring:
+
+.. code-block::
+
+    >>> help(pdal.Filter.head)
+    Help on function head in module pdal.pipeline:
+
+    head(**kwargs)
+        Return N points from beginning of the point cloud.
+
+        user_data: User JSON
+        log: Debug output filename
+        option_file: File from which to read additional options
+        where: Expression describing points to be passed to this filter
+        where_merge='auto': If 'where' option is set, describes how skipped points should be merged with kept points in standard mode.
+        count='10': Number of points to return from beginning.  If 'invert' is true, number of points to drop from the beginning.
+        invert='false': If true, 'count' specifies the number of points to skip from the beginning.
+
+Pipeline Objects
+================
+
+A ``pdal.Pipeline`` instance can be created from:
+
+- a JSON string: ``Pipeline(json_string)``
+- a sequence of ``Stage`` instances: ``Pipeline([stage1, stage2])``
+- a single ``Stage`` with the ``Stage.pipeline`` method: ``stage.pipeline()``
+- nothing: ``Pipeline()`` creates a pipeline with no stages.
+- joining ``Stage`` and/or other ``Pipeline`` instances together with the pipe
+  operator (``|``):
+
+  - ``stage1 | stage2``
+  - ``stage1 | pipeline1``
+  - ``pipeline1 | stage1``
+  - ``pipeline1 | pipeline2``
+
+Every application of the pipe operator creates a new ``Pipeline`` instance. To
+update an existing ``Pipeline`` use the respective in-place pipe operator (``|=``):
+
+.. code-block:: python
+
+    # update pipeline in-place
+    pipeline = pdal.Pipeline()
+    pipeline |= stage
+    pipeline |= pipeline2
+
+Reading using Numpy Arrays
+................................................................................
+
+The following more complex scenario demonstrates the full cycling between
+PDAL and Python:
+
+* Read a small testfile from GitHub into a Numpy array
+* Filters the array with Numpy for Intensity
+* Pass the filtered array to PDAL to be filtered again
+* Write the final filtered array to a LAS file and a TileDB_ array
+  via the `TileDB-PDAL integration`_ using the `TileDB writer plugin`_
+
+.. code-block:: python
+
+    import pdal
+
+    data = "https://github.com/PDAL/PDAL/blob/master/test/data/las/1.2-with-color.las?raw=true"
+
+    pipeline = pdal.Reader.las(filename=data).pipeline()
+    print(pipeline.execute())  # 1065 points
+
+    # Get the data from the first array
+    # [array([(637012.24, 849028.31, 431.66, 143, 1,
+    # 1, 1, 0, 1,  -9., 132, 7326, 245380.78254963,  68,  77,  88),
+    # dtype=[('X', '<f8'), ('Y', '<f8'), ('Z', '<f8'), ('Intensity', '<u2'),
+    # ('ReturnNumber', 'u1'), ('NumberOfReturns', 'u1'), ('ScanDirectionFlag', 'u1'),
+    # ('EdgeOfFlightLine', 'u1'), ('Classification', 'u1'), ('ScanAngleRank', '<f4'),
+    # ('UserData', 'u1'), ('PointSourceId', '<u2'),
+    # ('GpsTime', '<f8'), ('Red', '<u2'), ('Green', '<u2'), ('Blue', '<u2')])
+    arr = pipeline.arrays[0]
+
+    # Filter out entries that have intensity < 50
+    intensity = arr[arr["Intensity"] > 30]
+    print(len(intensity))  # 704 points
+
+    # Now use pdal to clamp points that have intensity 100 <= v < 300
+    pipeline = pdal.Filter.expression(expression="Intensity >= 100 && Intensity < 300").pipeline(intensity)
+    print(pipeline.execute())  # 387 points
+    clamped = pipeline.arrays[0]
+
+    # Write our intensity data to a LAS file and a TileDB array. For TileDB it is
+    # recommended to use Hilbert ordering by default with geospatial point cloud data,
+    # which requires specifying a domain extent. This can be determined automatically
+    # from a stats filter that computes statistics about each dimension (min, max, etc.).
+    pipeline = pdal.Writer.las(
+        filename="clamped.las",
+        offset_x="auto",
+        offset_y="auto",
+        offset_z="auto",
+        scale_x=0.01,
+        scale_y=0.01,
+        scale_z=0.01,
+    ).pipeline(clamped)
+    pipeline |= pdal.Filter.stats() | pdal.Writer.tiledb(array_name="clamped")
+    print(pipeline.execute())  # 387 points
+
+    # Dump the TileDB array schema
+    import tiledb
+    with tiledb.open("clamped") as a:
+        print(a.schema)
+
+Reading using Numpy Arrays as buffers (advanced)
+................................................................................
+
+It's also possible to treat the Numpy arrays passed to PDAL as buffers that are iteratively populated through
+custom python functions during the execution of the pipeline.
+
+This may be useful in cases where you want the reading of the input data to be handled in a streamable fashion,
+like for example:
+
+* When the total Numpy array data wouldn't fit into memory.
+* To initiate execution of a streamable PDAL pipeline while the input data is still being read.
+
+To enable this mode, you just need to include the python populate function along with each corresponding Numpy array.
+
+.. code-block:: python
+
+    # Numpy array to be used as buffer
+    in_buffer = np.zeros(max_chunk_size, dtype=[("X", float), ("Y", float), ("Z", float)])
+
+    # The function to populate the buffer iteratively
+    def load_next_chunk() -> int:
+    """
+    Function called by PDAL before reading the data from the buffer.
+
+    IMPORTANT: must return the total number of items to be read from the buffer.
+    The Pipeline execution will keep calling this function in a loop until 0 is returned.
+    """
+        #
+        # Replace here with your code that populates the buffer and returns the number of elements to read
+        #
+        chunk_size = next_chunk.size
+        in_buffer[:chunk_size]["X"] = next_chunk[:]["X"]
+        in_buffer[:chunk_size]["Y"] = next_chunk[:]["Y"]
+        in_buffer[:chunk_size]["Z"] = next_chunk[:]["Z"]
+
+        return chunk_size
+
+    # Configure input array and handler during Pipeline initialization...
+    p = pdal.Pipeline(pipeline_json, arrays=[in_buffer], stream_handlers=[load_next_chunk])
+
+    # ...alternatively you can use the setter on an existing Pipeline
+    # p.inputs = [(in_buffer, load_next_chunk)]
+
+The following snippet provides a simple example of how to use a Numpy array as buffer to support writing through PDAL
+with total control over the maximum amount of memory to use.
+
+Example: Streaming the read and write of a very large LAZ file with low memory footprint
+.........................................................................................
+
+
+
+.. code-block:: python
+
+    import numpy as np
+    import pdal
+
+    in_chunk_size = 10_000_000
+    in_pipeline = pdal.Reader.las(**{
+        "filename": "in_test.laz"
+    }).pipeline()
+
+    in_pipeline_it = in_pipeline.iterator(in_chunk_size).__iter__()
+
+    out_chunk_size = 50_000_000
+    out_file = "out_test.laz"
+    out_pipeline = pdal.Writer.las(
+        filename=out_file
+    ).pipeline()
+
+    out_buffer = np.zeros(in_chunk_size, dtype=[("X", float), ("Y", float), ("Z", float)])
+
+    def load_next_chunk():
+        try:
+            next_chunk = next(in_pipeline_it)
+        except StopIteration:
+            # Stops the streaming
+            return 0
+
+        chunk_size = next_chunk.size
+        out_buffer[:chunk_size]["X"] = next_chunk[:]["X"]
+        out_buffer[:chunk_size]["Y"] = next_chunk[:]["Y"]
+        out_buffer[:chunk_size]["Z"] = next_chunk[:]["Z"]
+
+        print(f"Loaded next chunk -> {chunk_size}")
+
+        return chunk_size
+
+    out_pipeline.inputs = [(out_buffer, load_next_chunk)]
+
+    out_pipeline.loglevel = 20 # INFO
+    count = out_pipeline.execute_streaming(out_chunk_size)
+
+    print(f"\nWROTE - {count}")
+
+
+
+Executing Streamable Pipelines
+................................................................................
+Streamable pipelines (pipelines that consist exclusively of streamable PDAL
+stages) can be executed in streaming mode via ``Pipeline.iterator()``. This
+returns an iterator object that yields Numpy arrays of up to ``chunk_size`` size
+(default=10000) at a time.
+
+.. code-block:: python
+
+    import pdal
+    pipeline = pdal.Reader("test/data/autzen-utm.las") | pdal.Filter.expression(expression="Intensity > 80 && Intensity < 120)")
+    for array in pipeline.iterator(chunk_size=500):
+        print(len(array))
+    # or to concatenate all arrays into one
+    # full_array = np.concatenate(list(pipeline))
+
+``Pipeline.iterator()`` also takes an optional ``prefetch`` parameter (default=0)
+to allow prefetching up to to this number of arrays in parallel and buffering
+them until they are yielded to the caller.
+
+If you just want to execute a streamable pipeline in streaming mode and don't
+need to access the data points (typically when the pipeline has Writer stage(s)),
+you can use the ``Pipeline.execute_streaming(chunk_size)`` method instead. This
+is functionally equivalent to ``sum(map(len, pipeline.iterator(chunk_size)))``
+but more efficient as it avoids allocating and filling any arrays in memory.
+
+Accessing Mesh Data
+................................................................................
+
+Some PDAL stages (for instance ``filters.delaunay``) create TIN type mesh data.
+
+This data can be accessed in Python using the ``Pipeline.meshes`` property, which returns a ``numpy.ndarray``
+of shape (1,n) where n is the number of Triangles in the mesh.
+
+If the PointView contains no mesh data, then n = 0.
+
+Each Triangle is a tuple ``(A,B,C)`` where A, B and C are indices into the PointView identifying the point that is the vertex for the Triangle.
+
+Meshio Integration
+................................................................................
+
+The meshes property provides the face data but is not easy to use as a mesh. Therefore, we have provided optional Integration
+into the `Meshio <https://github.com/nschloe/meshio>`__ library.
+
+The ``pdal.Pipeline`` class provides the ``get_meshio(idx: int) -> meshio.Mesh`` method. This
+method creates a `Mesh` object from the `PointView` array and mesh properties.
+
+.. note:: The meshio integration requires that meshio is installed (e.g. ``pip install meshio``). If it is not, then the method fails with an informative RuntimeError.
+
+Simple use of the functionality could be as follows:
+
+.. code-block:: python
+
+    import pdal
+
+    ...
+    pl = pdal.Pipeline(pipeline)
+    pl.execute()
+
+    mesh = pl.get_meshio(0)
+    mesh.write('test.obj')
+
+Advanced Mesh Use Case
+................................................................................
+
+USE-CASE : Take a LiDAR map, create a mesh from the ground points, split into tiles and store the tiles in PostGIS.
+
+.. note:: Like ``Pipeline.arrays``, ``Pipeline.meshes`` returns a list of ``numpy.ndarray`` to provide for the case where the output from a Pipeline is multiple PointViews
+
+(example using 1.2-with-color.las and not doing the ground classification for clarity)
+
+.. code-block:: python
+
+    import pdal
+    import psycopg2
+    import io
+
+    pl = (
+        pdal.Reader(".../python/test/data/1.2-with-color.las")
+        | pdal.Filter.splitter(length=1000)
+        | pdal.Filter.delaunay()
+    )
+    pl.execute()
+
+    conn = psycopg(%CONNNECTION_STRING%)
+    buffer = io.StringIO
+
+    for idx in range(len(pl.meshes)):
+        m =  pl.get_meshio(idx)
+        if m:
+            m.write(buffer,  file_format = "wkt")
+            with conn.cursor() as curr:
+              curr.execute(
+                  "INSERT INTO %table-name% (mesh) VALUES (ST_GeomFromEWKT(%(ewkt)s)",
+                  { "ewkt": buffer.getvalue()}
+              )
+
+    conn.commit()
+    conn.close()
+    buffer.close()
+
+
+Digital Terrain Model (DTM) Creation Example
+................................................................................
+
+The following is a script sample that can be used to create a DTM from a PDAL-
+readable pointcloud.
+
+Method:
+
+#. read point cloud file
+#. remove noise
+#. clean up invalid values
+#. classify ground points using `SMRF <https://pdal.io/en/2.9.2/stages/filters.smrf.html>`__
+#. write with `GDAL writer <https://pdal.io/en/2.9.2/stages/writers.gdal.html>`__
+
+.. note:: If your pointcloud already has ground classified, you can skip all but
+    the reader and writer and achieve the same result.
+
+.. code-block:: python
+
+    import pdal
+
+    pc_path = 'https://github.com/PDAL/data/raw/refs/heads/main/autzen/autzen.laz'
+    out_file = 'autzen_dtm.tif'
+
+
+    # read
+    reader = pdal.Reader.las(pc_path)
+
+    # remove noisy points
+    lownoise_filter = pdal.Filter.range(
+        limits='Classification![7:7]', tag='lownoise'
+    )
+    highnoise_filter = pdal.Filter.range(
+        limits='Classification![18:]', tag='highnoise'
+    )
+
+    # saving incorrectly labeled returns here, some people want this, some don't
+    prepare_ground = pdal.Filter.assign(
+        value=[
+            'Classification=0',
+            'ReturnNumber=1 WHERE ReturnNumber < 1',
+            'NumberOfReturns=1 WHERE NumberOfReturns < 1',
+        ],
+        tag='prepare_ground_classifier',
+    )
+
+    # classify ground
+    smrf_classifier = pdal.Filter.smrf(tag='ground_classifier')
+
+    # write with gdal, resolution in feet for autzen
+    gdal_writer = pdal.Writer.gdal(
+        filename=out_file,
+        where='Classification == 2',
+        data_type='float32',
+        resolution=10,
+        output_type='idw',
+        window_size=3,
+        pdal_metadata=True,
+    )
+
+    # collect pdal stages and execute pipline
+    pipeline = (
+        reader
+        | lownoise_filter
+        | highnoise_filter
+        | prepare_ground
+        | smrf_classifier
+        | gdal_writer
+    )
+    pipeline.execute()
+
 
 .. _`Numpy`: http://www.numpy.org/
 .. _`schema`: http://www.pdal.io/dimensions.html
 .. _`metadata`: http://www.pdal.io/development/metadata.html
+.. _`TileDB`: https://tiledb.com/
+.. _`TileDB-PDAL integration`: https://docs.tiledb.com/geospatial/pdal
+.. _`TileDB writer plugin`: https://pdal.io/stages/writers.tiledb.html
 
-
-.. image:: https://travis-ci.org/PDAL/python.svg?branch=master
-    :target: https://travis-ci.org/PDAL/python
-
-.. image:: https://ci.appveyor.com/api/projects/status/of4kecyahpo8892d
-   :target: https://ci.appveyor.com/project/hobu/python/
+.. image:: https://github.com/PDAL/python/workflows/Build/badge.svg
+   :target: https://github.com/PDAL/python/actions?query=workflow%3ABuild
 
 Requirements
 ================================================================================
 
-* PDAL 1.7+
-* Python >=2.7 (including Python 3.x)
-* Cython (eg :code:`pip install cython`)
-* Packaging (eg :code:`pip install packaging`)
-
+* PDAL 2.7+
+* Python >=3.9
+* Pybind11 (eg :code:`pip install pybind11[global]`)
+* Numpy >= 1.22 (eg :code:`pip install numpy`)
+* scikit-build-core (eg :code:`pip install scikit-build-core`)
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
deleted file mode 100644
index 7677b24b..00000000
--- a/azure-pipelines.yml
+++ /dev/null
@@ -1,5 +0,0 @@
-jobs:
-  - template: ./scripts/azp/linux.yml
-  - template: ./scripts/azp/win.yml
-  - template: ./scripts/azp/osx.yml
-
diff --git a/debug.sh b/debug.sh
deleted file mode 100755
index 80f394b2..00000000
--- a/debug.sh
+++ /dev/null
@@ -1 +0,0 @@
-DYLD_LIBRARY_PATH=/Users/hobu/pdal-build/lib /Applications/Xcode.app/Contents/Developer/usr/bin/lldb -- python3 setup.py test
diff --git a/pdal/PyPipeline.cpp b/pdal/PyPipeline.cpp
deleted file mode 100644
index 66c613e3..00000000
--- a/pdal/PyPipeline.cpp
+++ /dev/null
@@ -1,178 +0,0 @@
-/******************************************************************************
-* Copyright (c) 2016, Howard Butler (howard@hobu.co)
-*
-* All rights reserved.
-*
-* Redistribution and use in source and binary forms, with or without
-* modification, are permitted provided that the following
-* conditions are met:
-*
-*     * Redistributions of source code must retain the above copyright
-*       notice, this list of conditions and the following disclaimer.
-*     * Redistributions in binary form must reproduce the above copyright
-*       notice, this list of conditions and the following disclaimer in
-*       the documentation and/or other materials provided
-*       with the distribution.
-*     * Neither the name of Hobu, Inc. or Flaxen Geo Consulting nor the
-*       names of its contributors may be used to endorse or promote
-*       products derived from this software without specific prior
-*       written permission.
-*
-* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
-* OF SUCH DAMAGE.
-****************************************************************************/
-
-#include "PyPipeline.hpp"
-
-#ifndef _WIN32
-#include <dlfcn.h>
-#endif
-
-#include <Python.h>
-#include <numpy/arrayobject.h>
-
-#include <pdal/Stage.hpp>
-#include <pdal/pdal_features.hpp>
-
-#include "PyArray.hpp"
-
-namespace pdal
-{
-namespace python
-{
-
-// Create a pipeline for writing data to PDAL
-Pipeline::Pipeline(std::string const& json, std::vector<Array*> arrays) :
-    m_executor(new PipelineExecutor(json))
-{
-#ifndef _WIN32
-    // See comment in alternate constructor below.
-    ::dlopen("libpdal_base.so", RTLD_NOLOAD | RTLD_GLOBAL);
-#endif
-
-    if (_import_array() < 0)
-        throw pdal_error("Could not impory numpy.core.multiarray.");
-
-    PipelineManager& manager = m_executor->getManager();
-
-    std::stringstream strm(json);
-    manager.readPipeline(strm);
-    std::vector<Stage *> roots = manager.roots();
-    if (roots.size() != 1)
-        throw pdal_error("Filter pipeline must contain a single root stage.");
-
-    for (auto array : arrays)
-    {
-        // Create numpy reader for each array
-        // Options
-
-        Options options;
-        options.add("order", array->rowMajor() ?
-            MemoryViewReader::Order::RowMajor :
-            MemoryViewReader::Order::ColumnMajor);
-        options.add("shape", MemoryViewReader::Shape(array->shape()));
-
-        Stage& s = manager.makeReader("", "readers.memoryview", options);
-        MemoryViewReader& r = dynamic_cast<MemoryViewReader &>(s);
-        for (auto f : array->fields())
-            r.pushField(f);
-
-        ArrayIter& iter = array->iterator();
-        auto incrementer = [&iter](PointId id) -> char *
-        {
-            if (! iter)
-                return nullptr;
-
-            char *c = *iter;
-            ++iter;
-            return c;
-        };
-
-        r.setIncrementer(incrementer);
-        PyObject* parray = (PyObject*)array->getPythonArray();
-        if (!parray)
-            throw pdal_error("array was none!");
-
-        roots[0]->setInput(r);
-    }
-
-    manager.validateStageOptions();
-}
-
-// Create a pipeline for reading data from PDAL
-Pipeline::Pipeline(std::string const& json) :
-    m_executor(new PipelineExecutor(json))
-{
-    // Make the symbols in pdal_base global so that they're accessible
-    // to PDAL plugins.  Python dlopen's this extension with RTLD_LOCAL,
-    // which means that without this, symbols in libpdal_base aren't available
-    // for resolution of symbols on future runtime linking.  This is an issue
-    // on Alpine and other Linux variants that don't use UNIQUE symbols
-    // for C++ template statics only.  Without this, you end up with multiple
-    // copies of template statics.
-#ifndef _WIN32
-    ::dlopen("libpdal_base.so", RTLD_NOLOAD | RTLD_GLOBAL);
-#endif
-    if (_import_array() < 0)
-        throw pdal_error("Could not impory numpy.core.multiarray.");
-}
-
-Pipeline::~Pipeline()
-{}
-
-
-void Pipeline::setLogLevel(int level)
-{
-    m_executor->setLogLevel(level);
-}
-
-
-int Pipeline::getLogLevel() const
-{
-    return static_cast<int>(m_executor->getLogLevel());
-}
-
-
-int64_t Pipeline::execute()
-{
-    return m_executor->execute();
-}
-
-bool Pipeline::validate()
-{
-    auto res =  m_executor->validate();
-    return res;
-}
-
-std::vector<Array *> Pipeline::getArrays() const
-{
-    std::vector<Array *> output;
-
-    if (!m_executor->executed())
-        throw python_error("call execute() before fetching arrays");
-
-    const PointViewSet& pvset = m_executor->getManagerConst().views();
-
-    for (auto i: pvset)
-    {
-        //ABELL - Leak?
-        Array *array = new python::Array;
-        array->update(i);
-        output.push_back(array);
-    }
-    return output;
-}
-
-} // namespace python
-} // namespace pdal
-
diff --git a/pdal/__init__.py b/pdal/__init__.py
deleted file mode 100644
index 4d775cb6..00000000
--- a/pdal/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-__version__='2.2.2'
-
-from .pipeline import Pipeline
-from .array import Array
-from .dimension import dimensions
-
-from pdal.libpdalpython import getVersionString, getVersionMajor, getVersionMinor, getVersionPatch, getSha1, getDebugInformation, getPluginInstallPath
-
-class Info(object):
-    version = getVersionString()
-    major = getVersionMajor()
-    minor = getVersionMinor()
-    patch = getVersionPatch()
-    debug = getDebugInformation()
-    sha1 = getSha1()
-    plugin = getPluginInstallPath()
-
-info = Info()
diff --git a/pdal/array.py b/pdal/array.py
deleted file mode 100644
index d9859278..00000000
--- a/pdal/array.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import numpy as np
-from pdal import libpdalpython
-
-class Array(object):
-    """A Numpy Array that can speak PDAL"""
-
-    def __init__(self, data):
-        self.p = libpdalpython.PyArray(data)
diff --git a/pdal/dimension.py b/pdal/dimension.py
deleted file mode 100644
index 867609b1..00000000
--- a/pdal/dimension.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import numpy as np
-from pdal import libpdalpython
-from pdal.libpdalpython import getDimensions
-
-dimensions = getDimensions()
-
diff --git a/pdal/libpdalpython.pyx b/pdal/libpdalpython.pyx
deleted file mode 100644
index 690b0de1..00000000
--- a/pdal/libpdalpython.pyx
+++ /dev/null
@@ -1,170 +0,0 @@
-# distutils: language = c++
-# cython: c_string_type=unicode, c_string_encoding=utf8
-
-from libcpp.vector cimport vector
-from libcpp.string cimport string
-from libc.stdint cimport uint32_t, int64_t
-from libcpp cimport bool
-from cpython.version cimport PY_MAJOR_VERSION
-cimport numpy as np
-np.import_array()
-
-from cpython cimport PyObject, Py_INCREF
-from cython.operator cimport dereference as deref, preincrement as inc
-
-cdef extern from "pdal/pdal_config.hpp" namespace "pdal::Config":
-    cdef int versionMajor() except +
-    cdef int versionMinor() except +
-    cdef int versionPatch() except +
-    cdef string sha1() except+
-    cdef string debugInformation() except+
-    cdef string pluginInstallPath() except+
-    cdef string versionString() except+
-
-def getVersionString():
-    return versionString()
-def getVersionMajor():
-    return versionMajor()
-def getVersionMinor():
-    return versionMinor()
-def getVersionPatch():
-    return versionPatch()
-def getSha1():
-    return sha1()
-def getDebugInformation():
-    return debugInformation()
-def getPluginInstallPath():
-    return pluginInstallPath()
-
-cdef extern from "PyArray.hpp" namespace "pdal::python":
-    cdef cppclass Array:
-        Array(np.ndarray) except +
-        void *getPythonArray() except+
-
-cdef extern from "PyPipeline.hpp" namespace "pdal::python":
-    cdef cppclass Pipeline:
-        Pipeline(const char* ) except +
-        Pipeline(const char*, vector[Array*]& ) except +
-        int64_t execute() except +
-        bool validate() except +
-        string getPipeline() except +
-        string getMetadata() except +
-        string getSchema() except +
-        string getLog() except +
-        vector[Array*] getArrays() except +
-        int getLogLevel()
-        void setLogLevel(int)
-
-cdef class PyArray:
-    cdef Array *thisptr
-    def __cinit__(self, np.ndarray array):
-        self.thisptr = new Array(array)
-    def __dealloc__(self):
-        del self.thisptr
-
-cdef extern from "PyDimension.hpp":
-    ctypedef struct Dimension:
-        string name;
-        string description;
-        int size;
-        string type;
-##         string units; // Not defined by PDAL yet
-
-    cdef vector[Dimension] getValidDimensions() except +
-
-
-def getDimensions():
-        cdef vector[Dimension] c_dims;
-        c_dims = getValidDimensions()
-        output = []
-        cdef vector[Dimension].iterator it = c_dims.begin()
-        while it != c_dims.end():
-            ptr = deref(it)
-            d = {}
-            d['name'] = ptr.name
-            d['description'] = ptr.description
-            kind = ptr.type + str(ptr.size)
-            d['dtype'] = np.dtype(kind)
-            ptr = deref(it)
-            output.append(d)
-            inc(it)
-        return output
-
-
-cdef class PyPipeline:
-    cdef Pipeline *thisptr      # hold a c++ instance which we're wrapping
-
-
-    def __cinit__(self, unicode json, list arrays=None):
-        cdef char* x = NULL
-        cdef int n_arrays;
-        if arrays:
-            n_arrays = len(arrays)
-
-        cdef vector[Array*] c_arrays;
-        cdef np.ndarray np_array;
-        cdef Array* a
-
-        if arrays is not None:
-            for array in arrays:
-                a = new Array(array)
-                c_arrays.push_back(a)
-
-            self.thisptr = new Pipeline(json.encode('UTF-8'), c_arrays)
-        else:
-            self.thisptr = new Pipeline(json.encode('UTF-8'))
-
-    def __dealloc__(self):
-        del self.thisptr
-
-    property pipeline:
-        def __get__(self):
-            return self.thisptr.getPipeline()
-
-    property metadata:
-        def __get__(self):
-            return self.thisptr.getMetadata()
-
-    property loglevel:
-        def __get__(self):
-            return self.thisptr.getLogLevel()
-        def __set__(self, v):
-            self.thisptr.setLogLevel(v)
-
-    property log:
-        def __get__(self):
-
-            return self.thisptr.getLog()
-
-    property schema:
-        def __get__(self):
-            import json
-
-            j = self.thisptr.getSchema()
-            return json.loads(j)
-
-    property arrays:
-
-        def __get__(self):
-            v = self.thisptr.getArrays()
-            output = []
-            cdef vector[Array*].iterator it = v.begin()
-            cdef Array* a
-            while it != v.end():
-                ptr = deref(it)
-                a = ptr#.get()
-                o = a.getPythonArray()
-                output.append(<object>o)
-                inc(it)
-            return output
-
-
-    def execute(self):
-        if not self.thisptr:
-            raise Exception("C++ Pipeline object not constructed!")
-        return self.thisptr.execute()
-
-    def validate(self):
-        if not self.thisptr:
-            raise Exception("C++ Pipeline object not constructed!")
-        return self.thisptr.validate()
diff --git a/pdal/pio.py b/pdal/pio.py
deleted file mode 100644
index fbbc15e0..00000000
--- a/pdal/pio.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""
-This module provides a python-syntax interface for constructing and executing pdal-python json
-pipelines.  The API is not explicitly defined but stage names are validated against the pdal executable's drivers when possible.
-
-To construct pipeline stages, access the driver name from this module.  This will create
-a callable function where driver parameters can be specified as keyword arguments.  For example:
-
->>> from pdal import pio
->>> las_reader = pio.readers.las(filename="test.las")
-
-To construct a pipeline, sum stages together.
-
->>> pipeline = pio.readers.las(filename="test.las") + pio.writers.ply(filename="test.ply")
-
-To execute a pipeline and return results, call `execute`.
-
->>> arr = pipeline.execute() # returns a numpy structured array
-
-To access the pipelines as a dict (which may be dumped to json), call `spec`.
-
->>> json.dumps(pipeline.spec)
-
-"""
-
-import types
-import json
-import subprocess
-from functools import partial
-from collections import defaultdict
-from itertools import chain
-import copy
-import warnings
-
-import pdal
-
-try:
-    PDAL_DRIVERS_JSON = subprocess.run(["pdal", "--drivers", "--showjson"], capture_output=True).stdout
-    PDAL_DRIVERS = json.loads(PDAL_DRIVERS_JSON)
-    _PDAL_VALIDATE = True
-except:
-    PDAL_DRIVERS = []
-    _PDAL_VALIDATE = False
-
-DEFAULT_STAGE_PARAMS = defaultdict(dict)
-DEFAULT_STAGE_PARAMS.update({
-# TODO: add stage specific default configurations
-})
-
-
-class StageSpec(object):
-    def __init__(self, prefix, **kwargs):
-        self.prefix = prefix
-        self.key = ".".join([self.prefix, kwargs.get("type", "")])
-        self.spec = DEFAULT_STAGE_PARAMS[self.key].copy()
-        self.spec.update(kwargs)
-        self.spec["type"] = self.key
-        # NOTE: special case to support reading files without passing an explicit reader
-        if (self.prefix in ["readers", "writers"]) and kwargs.get("type") == "auto":
-            del self.spec["type"]
-
-    @property
-    def pipeline(self):
-        """
-        Promote this stage to  a `pdal.pio.PipelineSpec` with one `pdal.pio.StageSpec`
-        and return it.
-        """
-        output = PipelineSpec()
-        output.add_stage(self)
-        return output
-
-    def __getattr__(self, name):
-        if _PDAL_VALIDATE and (name not in dir(self)):
-            raise AttributeError(f"'{self.prefix}.{name}' is an invalid or unsupported PDAL stage")
-        return partial(self.__class__, self.prefix, type=name)
-
-    def __str__(self):
-        return json.dumps(self.spec, indent=4)
-
-    def __add__(self, other):
-        return self.pipeline + other
-
-    def __dir__(self):
-        extra_keys = [e["name"][len(self.key):] for e in PDAL_DRIVERS if e["name"].startswith(self.key)] + ["auto"]
-        return super().__dir__() + [e for e in extra_keys if len(e) > 0]
-
-    def execute(self):
-        return self.pipeline.execute()
-
-
-readers = StageSpec("readers")
-filters = StageSpec("filters")
-writers = StageSpec("writers")
-
-
-class PipelineSpec(object):
-    stages = []
-
-    def __init__(self, other=None):
-        if other is not None:
-            self.stages = copy.copy(other.stages)
-
-    @property
-    def spec(self):
-        """
-        Return a `dict` containing the pdal pipeline suitable for dumping to json
-        """
-        return {
-            "pipeline": [stage.spec for stage in self.stages]
-        }
-
-    def add_stage(self, stage):
-        """
-        Add a StageSpec to the end of this pipeline, and return the updated result.
-        """
-        assert isinstance(stage, StageSpec), "Expected StageSpec"
-
-        self.stages.append(stage)
-        return self
-
-    def __str__(self):
-        return json.dumps(self.spec, indent=4)
-
-    def __add__(self, stage_or_pipeline):
-        assert isinstance(stage_or_pipeline, (StageSpec, PipelineSpec)), "Expected StageSpec or PipelineSpec"
-
-        output = self.__class__(self)
-        if isinstance(stage_or_pipeline, StageSpec):
-            output.add_stage(stage_or_pipeline)
-        elif isinstance(stage_or_pipeline, PipelineSpec):
-            for stage in stage_or_pipeline.stages:
-                output.add_stage(stage)
-        return output
-
-    def execute(self):
-        """
-        Shortcut to execute and return the results of the pipeline.
-        """
-        # TODO: do some validation before calling execute
-
-        # TODO: some exception/error handling around pdal
-        pipeline = pdal.Pipeline(json.dumps(self.spec))
-        # pipeline.validate() # NOTE: disabling this because it causes segfaults in certain cases
-        pipeline.execute()
-
-        return pipeline.arrays[0] # NOTE: are there situation where arrays has multiple elements?
diff --git a/pdal/pipeline.py b/pdal/pipeline.py
deleted file mode 100644
index fca579e9..00000000
--- a/pdal/pipeline.py
+++ /dev/null
@@ -1,47 +0,0 @@
-
-from pdal import libpdalpython
-import numpy as np
-
-class Pipeline(object):
-    """A PDAL pipeline object, defined by JSON. See http://www.pdal.io/pipeline.html for more
-    information on how to define one"""
-
-    def __init__(self, json, arrays=None):
-
-        if arrays:
-            self.p = libpdalpython.PyPipeline(json, arrays)
-        else:
-            self.p = libpdalpython.PyPipeline(json)
-
-    def get_metadata(self):
-        return self.p.metadata
-    metadata = property(get_metadata)
-
-    def get_schema(self):
-        return self.p.schema
-    schema = property(get_schema)
-
-    def get_pipeline(self):
-        return self.p.pipeline
-    pipeline = property(get_pipeline)
-
-    def get_loglevel(self):
-        return self.p.loglevel
-
-    def set_loglevel(self, v):
-        self.p.loglevel = v
-    loglevel = property(get_loglevel, set_loglevel)
-
-    def get_log(self):
-        return self.p.log
-    log = property(get_log)
-
-    def execute(self):
-        return self.p.execute()
-
-    def validate(self):
-        return self.p.validate()
-
-    def get_arrays(self):
-        return self.p.arrays
-    arrays = property(get_arrays)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..e1b98235
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,69 @@
+[project]
+name = "pdal"
+description = "Point cloud data processing"
+readme = "README.rst"
+requires-python = ">=3.9"
+license = {file = "LICENSE.txt"}
+keywords = ["point", "cloud", "spatial"]
+authors = [
+  {email = "howard@hobu.co"},
+  {name = "Howard Butler"}
+]
+maintainers = [
+  {name = "Howard Butler", email = "howard@hobu.co"}
+]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: BSD License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: GIS",
+]
+
+dependencies = [
+  "numpy >= 1.22"
+]
+
+dynamic = ["version"]
+
+[project.optional-dependencies]
+test = [
+  "pandas",
+  "meshio"
+]
+
+[tool.setuptools]
+package-dir = {"" =  "src"}
+zip-safe = false
+
+[project.urls]
+homepage = "https://pdal.io"
+documentation = "https://pdal.io"
+repository = "https://github.com/PDAL/Python"
+changelog = "https://github.com/PDAL/python/blob/main/README.rst"
+
+[build-system]
+requires = ["scikit-build-core >= 0.9", "numpy >= 1.22",  "pybind11[global]"]
+build-backend = "scikit_build_core.build"
+
+
+[tool.scikit-build]
+build-dir = "build/{wheel_tag}"
+sdist.exclude = [".github"]
+sdist.cmake = true
+cmake.build-type = "Release"
+sdist.include = [
+  "src",
+  "CMakeLists.txt"
+]
+build.verbose = false
+logging.level = "ERROR"
+
+[tool.scikit-build.metadata.version]
+provider = "scikit_build_core.metadata.regex"
+input = "src/pdal/__init__.py"
diff --git a/scripts/appveyor/build.cmd b/scripts/appveyor/build.cmd
deleted file mode 100644
index 8306aebb..00000000
--- a/scripts/appveyor/build.cmd
+++ /dev/null
@@ -1,7 +0,0 @@
-call "%CONDA_ROOT%\Scripts\activate.bat" base
-call conda install -c conda-forge -y pdal cython packaging
-
-call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
-
-python setup.py build
-
diff --git a/scripts/appveyor/config.cmd b/scripts/appveyor/config.cmd
deleted file mode 100644
index 3188a8bc..00000000
--- a/scripts/appveyor/config.cmd
+++ /dev/null
@@ -1,16 +0,0 @@
-@echo off
-
-set "CONDA_ROOT=C:\Miniconda3-x64"
-set PATH=%CONDA_ROOT%;%CONDA_ROOT%\\scripts;%CONDA_ROOT%\\Library\\bin;%PATH%;C:\\Program Files (x86)\\CMake\\bin
-conda config --set always_yes yes
-conda config --add channels conda-forge
-conda config --set auto_update_conda no
-conda config --set channel_priority true
-conda update -q --all
-conda info
-python -c "import sys; print(sys.version)"
-python -c "import sys; print(sys.executable)"
-python -c "import sys; print(sys.prefix)"
-
-dir
-
diff --git a/scripts/appveyor/test.cmd b/scripts/appveyor/test.cmd
deleted file mode 100644
index 61c3b221..00000000
--- a/scripts/appveyor/test.cmd
+++ /dev/null
@@ -1,3 +0,0 @@
-call "%CONDA_ROOT%\Scripts\activate.bat" base
-
-python setup.py test
diff --git a/scripts/azp/linux.yml b/scripts/azp/linux.yml
deleted file mode 100644
index d5a98b99..00000000
--- a/scripts/azp/linux.yml
+++ /dev/null
@@ -1,21 +0,0 @@
-# -*- mode: yaml -*-
-
-jobs:
-- job: linux
-  pool:
-    vmImage: ubuntu-16.04
-  container:
-      image: pdal/pdal:latest
-      options: --privileged
-  timeoutInMinutes: 60
-  steps:
-  - script: |
-      sudo apt-get update -y; sudo apt-get install python3-pip -y
-      pip3 install numpy packaging cython
-    displayName: 'Dependencies'
-  - script: |
-      python3 setup.py build
-    displayName: 'Build'
-  - script: |
-      python3 setup.py test
-    displayName: 'Test'
diff --git a/scripts/azp/osx.yml b/scripts/azp/osx.yml
deleted file mode 100644
index 1598d2ab..00000000
--- a/scripts/azp/osx.yml
+++ /dev/null
@@ -1,53 +0,0 @@
-# -*- mode: yaml -*-
-
-jobs:
-- job: osx
-  pool:
-    vmImage: macOS-10.13
-  timeoutInMinutes: 360
-  steps:
-  - script: |
-      echo "Removing homebrew from Azure to avoid conflicts."
-      curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/uninstall > ~/uninstall_homebrew
-      chmod +x ~/uninstall_homebrew
-      ~/uninstall_homebrew -fq
-      rm ~/uninstall_homebrew
-    displayName: Remove homebrew
-  - bash: |
-      echo "##vso[task.prependpath]$CONDA/bin"
-      sudo chown -R $USER $CONDA
-    displayName: Add conda to PATH
-
-  - script: |
-      ECHO ON
-      source activate base
-      conda create --yes --quiet --name pdal
-    displayName: Create conda environment
-
-  - script: |
-      ECHO ON
-      source activate pdal
-      conda config --set always_yes True --set show_channel_urls True
-      conda install --yes --quiet --name pdal -c conda-forge  cython packaging pdal numpy conda-forge-ci-setup=2 -y
-    displayName: Install PDAL
-  - script: |
-      source activate pdal
-      export CI=azure
-      source run_conda_forge_build_setup
-    env: {
-      OSX_FORCE_SDK_DOWNLOAD: "1"
-    }
-    displayName: Configure OSX SDK
-  - script: |
-      source activate pdal
-      pip install numpy packaging cython
-    displayName: 'Dependencies'
-  - script: |
-      source activate pdal
-      python setup.py build
-    displayName: 'Build'
-  - script: |
-      source activate pdal
-      python setup.py test
-    displayName: 'Test'
-
diff --git a/scripts/azp/win.yml b/scripts/azp/win.yml
deleted file mode 100644
index 4abdc798..00000000
--- a/scripts/azp/win.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-
-jobs:
-- job: win
-
-  pool:
-    vmImage: vs2017-win2016
-  timeoutInMinutes: 360
-  steps:
-    - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts"
-      displayName: Add conda to PATH
-
-    - script: |
-        ECHO ON
-        call conda create --yes --quiet --name pdal
-      displayName: Create conda environment
-
-    - script: |
-        ECHO ON
-        call activate pdal
-        call conda config --set always_yes True --set show_channel_urls True
-        call conda install --yes --quiet --name pdal -c conda-forge  pdal numpy packaging cython -y
-      displayName: Install PDAL
-    - script: |
-        ECHO ON
-        call activate pdal
-        call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x86_amd64
-        echo %PATH%
-        set CC=cl.exe
-        set CXX=cl.exe
-        python setup.py build
-      displayName: 'Build'
-    - script: |
-        ECHO ON
-        call activate pdal
-        call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x86_amd64
-        set PYTHONHOME=%CONDA_PREFIX%
-        python setup.py test
-      displayName: 'Test'
-
-
diff --git a/scripts/travis/script.sh b/scripts/travis/script.sh
deleted file mode 100755
index 84f837c6..00000000
--- a/scripts/travis/script.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-apt-get update -y; apt-get install python3-pip -y
-pip3 install numpy packaging cython
-cd /pdal/
-python3 setup.py build
-python3 setup.py test
diff --git a/setup.cfg b/setup.cfg
deleted file mode 100644
index 7f75bb2c..00000000
--- a/setup.cfg
+++ /dev/null
@@ -1,4 +0,0 @@
-[egg_info]
-tag_build =
-tag_date = 0
-tag_svn_revision = 0
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 5ab97be4..00000000
--- a/setup.py
+++ /dev/null
@@ -1,223 +0,0 @@
-#!/usr/bin/env python
-
-# Stolen from Shapely's setup.py
-# Two environment variables influence this script.
-#
-# PDAL_LIBRARY_PATH: a path to a PDAL C++ shared library.
-#
-# PDAL_CONFIG: the path to a pdal-config program that points to PDAL version,
-# headers, and libraries.
-#
-# NB: within this setup scripts, software versions are evaluated according
-# to https://www.python.org/dev/peps/pep-0440/.
-
-import logging
-import os
-import platform
-import sys
-import numpy
-from Cython.Build import cythonize
-
-USE_CYTHON = True
-try:
-    from Cython.Build import cythonize
-except ImportError:
-    USE_CYTHON = False
-
-ext = '.pyx' if USE_CYTHON else '.cpp'
-
-from setuptools import setup
-from packaging.version import Version
-
-
-logging.basicConfig()
-log = logging.getLogger(__file__)
-
-# python -W all setup.py ...
-if 'all' in sys.warnoptions:
-    log.level = logging.DEBUG
-
-
-# Second try: use PDAL_CONFIG environment variable
-if 'PDAL_CONFIG' in os.environ:
-    pdal_config = os.environ['PDAL_CONFIG']
-    log.debug('pdal_config: %s', pdal_config)
-else:
-    pdal_config = 'pdal-config'
-    # in case of windows...
-    if os.name in ['nt']:
-        pdal_config += '.bat'
-
-
-def get_pdal_config(option):
-    '''Get configuration option from the `pdal-config` development utility
-
-    This code was adapted from Shapely's geos-config stuff
-    '''
-    import subprocess
-    pdal_config = globals().get('pdal_config')
-    if not pdal_config or not isinstance(pdal_config, str):
-        raise OSError('Path to pdal-config is not set')
-    try:
-        stdout, stderr = subprocess.Popen(
-            [pdal_config, option],
-            stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
-    except OSError as ex:
-        # e.g., [Errno 2] No such file or directory
-        raise OSError(
-            'Could not find pdal-config %r: %s' % (pdal_config, ex))
-    if stderr and not stdout:
-        raise ValueError(stderr.strip())
-    if sys.version_info[0] >= 3:
-        result = stdout.decode('ascii').strip()
-    else:
-        result = stdout.strip()
-    log.debug('%s %s: %r', pdal_config, option, result)
-    return result
-
-# Get the version from the pdal module
-module_version = None
-with open('pdal/__init__.py', 'r') as fp:
-    for line in fp:
-        if line.startswith("__version__"):
-            module_version = Version(line.split("=")[1].strip().strip("\"'"))
-            break
-
-if not module_version:
-    raise ValueError("Could not determine PDAL's version")
-
-# Handle UTF-8 encoding of certain text files.
-open_kwds = {}
-if sys.version_info >= (3,):
-    open_kwds['encoding'] = 'utf-8'
-
-with open('README.rst', 'r', **open_kwds) as fp:
-    readme = fp.read()
-
-with open('CHANGES.txt', 'r', **open_kwds) as fp:
-    changes = fp.read()
-
-long_description = readme + '\n\n' +  changes
-
-include_dirs = []
-library_dirs = []
-libraries = []
-extra_link_args = []
-extra_compile_args = []
-
-from setuptools.extension import Extension as DistutilsExtension
-
-PDALVERSION = None
-if pdal_config and "clean" not in sys.argv:
-    # Collect other options from PDAL
-    try:
-
-        # Running against different major versions is going to fail.
-        # Minor versions might too, depending on numpy.
-        for item in get_pdal_config('--python-version').split():
-            if item:
-                # 2.7.4 or 3.5.2
-                built_version = item.split('.')
-                built_major = int(built_version[0])
-                running_major = int(sys.version_info[0])
-                if built_major != running_major:
-                    message = "Version mismatch. PDAL Python support was compiled against version %d.x but setup is running version is %d.x. "
-                    raise Exception(message % (built_major, running_major))
-
-    # older versions of pdal-config do not include --python-version switch
-    except ValueError:
-        pass
-    PDALVERSION = Version(get_pdal_config('--version'))
-
-    separator = ':'
-    if os.name in ['nt']:
-        separator = ';'
-
-    for item in get_pdal_config('--includes').split():
-        if item.startswith("-I"):
-            include_dirs.extend(item[2:].split(separator))
-
-    for item in get_pdal_config('--libs').split():
-        if item.startswith("-L"):
-            library_dirs.extend(item[2:].split(separator))
-        elif item.startswith("-l"):
-            libraries.append(item[2:])
-
-include_dirs.append(numpy.get_include())
-
-if platform.system() == 'Darwin':
-    extra_link_args.append('-Wl,-rpath,'+library_dirs[0])
-
-DEBUG=True
-if DEBUG:
-    if os.name != 'nt':
-        extra_compile_args += ['-g','-O0']
-
-if PDALVERSION is not None and PDALVERSION < Version('2.0.0'):
-    raise Exception("PDAL version '%s' is not compatible with PDAL Python library version '%s'"%(PDALVERSION, module_version))
-
-
-if os.name in ['nt']:
-    if os.environ.get('OSGEO4W_ROOT'):
-        library_dirs = ['c:/%s/lib' % os.environ.get('OSGEO4W_ROOT')]
-    if os.environ.get('CONDA_PREFIX'):
-        prefix=os.path.expandvars('%CONDA_PREFIX%')
-        library_dirs = ['%s\Library\lib' % prefix]
-
-    libraries = ['pdalcpp','pdal_util','ws2_32']
-
-    extra_compile_args = ['/DNOMINMAX',]
-
-if 'linux' in sys.platform or 'linux2' in sys.platform or 'darwin' in sys.platform:
-    extra_compile_args += ['-std=c++11', '-Wno-unknown-pragmas']
-    if 'GCC' in sys.version:
-        # try to ensure the ABI for Conda GCC 4.8
-        if '4.8' in sys.version:
-            extra_compile_args += ['-D_GLIBCXX_USE_CXX11_ABI=0']
-
-
-
-sources=['pdal/libpdalpython'+ext, "pdal/PyPipeline.cpp", "pdal/PyArray.cpp" ]
-extensions = [DistutilsExtension("*",
-                                   sources,
-                                   include_dirs=include_dirs,
-                                   library_dirs=library_dirs,
-                                   extra_compile_args=extra_compile_args,
-                                   libraries=libraries,
-                                   extra_link_args=extra_link_args,)]
-if USE_CYTHON and "clean" not in sys.argv:
-    from Cython.Build import cythonize
-    extensions= cythonize(extensions, compiler_directives={'language_level':3})
-
-setup_args = dict(
-    name                = 'PDAL',
-    version             = str(module_version),
-    requires            = ['Python (>=3.0)', 'Numpy'],
-    description         = 'Point cloud data processing',
-    license             = 'BSD',
-    keywords            = 'point cloud spatial',
-    author              = 'Howard Butler',
-    author_email        = 'howard@hobu.co',
-    maintainer          = 'Howard Butler',
-    maintainer_email    = 'howard@hobu.co',
-    url                 = 'http://pdal.io',
-    long_description    = long_description,
-    test_suite          = 'test',
-    packages            = [
-        'pdal',
-    ],
-    classifiers         = [
-        'Development Status :: 5 - Production/Stable',
-        'Intended Audience :: Developers',
-        'Intended Audience :: Science/Research',
-        'License :: OSI Approved :: BSD License',
-        'Operating System :: OS Independent',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3',
-        'Topic :: Scientific/Engineering :: GIS',
-    ],
-    cmdclass           = {},
-    install_requires   = ['numpy', 'packaging', 'cython'],
-)
-setup(ext_modules=extensions, **setup_args)
-
diff --git a/setup.py.off b/setup.py.off
new file mode 100644
index 00000000..a3fd2a22
--- /dev/null
+++ b/setup.py.off
@@ -0,0 +1,45 @@
+from skbuild import setup
+
+# Get the version from the pdal module
+with open("pdal/__init__.py", "r") as fp:
+    for line in fp:
+        if line.startswith("__version__"):
+            version = line.split("=")[1].strip().strip("\"'")
+            break
+    else:
+        raise ValueError("Could not determine Python package version")
+
+with open("README.rst", "r", encoding="utf-8") as fp:
+    readme = fp.read()
+
+with open("CHANGES.txt", "r", encoding="utf-8") as fp:
+    changes = fp.read()
+
+setup(
+    name="pdal",
+    version=version,
+    description="Point cloud data processing",
+    license="BSD",
+    keywords="point cloud spatial",
+    author="Howard Butler",
+    author_email="howard@hobu.co",
+    maintainer="Howard Butler",
+    maintainer_email="howard@hobu.co",
+    url="https://pdal.io",
+    long_description=readme + "\n\n" + changes,
+    long_description_content_type="text/x-rst",
+    packages=["pdal"],
+    install_requires=["numpy"],
+    classifiers=[
+        "Development Status :: 5 - Production/Stable",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: BSD License",
+        "Operating System :: OS Independent",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Topic :: Scientific/Engineering :: GIS",
+    ],
+)
diff --git a/pdal/PyArray.cpp b/src/pdal/PyArray.cpp
similarity index 52%
rename from pdal/PyArray.cpp
rename to src/pdal/PyArray.cpp
index 73e6410d..62b4875a 100644
--- a/pdal/PyArray.cpp
+++ b/src/pdal/PyArray.cpp
@@ -35,8 +35,6 @@
 #include "PyArray.hpp"
 #include <pdal/io/MemoryViewReader.hpp>
 
-#include <numpy/arrayobject.h>
-
 namespace pdal
 {
 namespace python
@@ -79,7 +77,7 @@ Dimension::Type pdalType(int t)
     return Type::None;
 }
 
-std::string toString(PyObject *pname)
+std::string pyObjectToString(PyObject *pname)
 {
     PyObject* r = PyObject_Str(pname);
     if (!r)
@@ -90,25 +88,25 @@ std::string toString(PyObject *pname)
 
 } // unnamed namespace
 
-Array::Array() : m_array(nullptr)
-{
-    if (_import_array() < 0)
-        throw pdal_error("Could not import numpy.core.multiarray.");
-}
 
-Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true)
-{
-    if (_import_array() < 0)
-        throw pdal_error("Could not import numpy.core.multiarray.");
+#if NPY_ABI_VERSION < 0x02000000
+  #define PyDataType_FIELDS(descr) ((descr)->fields)
+  #define PyDataType_NAMES(descr) ((descr)->names)
+#endif
 
+Array::Array(PyArrayObject* array, std::shared_ptr<ArrayStreamHandler> stream_handler)
+    : m_array(array), m_rowMajor(true), m_stream_handler(std::move(stream_handler))
+{
     Py_XINCREF(array);
 
     PyArray_Descr *dtype = PyArray_DTYPE(m_array);
     npy_intp ndims = PyArray_NDIM(m_array);
     npy_intp *shape = PyArray_SHAPE(m_array);
-    int numFields = (dtype->fields == Py_None) ?
+
+    PyObject* fields = PyDataType_FIELDS(dtype);
+    int numFields = (fields  == Py_None) ?
         0 :
-        static_cast<int>(PyDict_Size(dtype->fields));
+        static_cast<int>(PyDict_Size(fields));
 
     int xyz = 0;
     if (numFields == 0)
@@ -119,7 +117,7 @@ Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true)
     }
     else
     {
-        PyObject *names_dict = dtype->fields;
+        PyObject *names_dict = fields;
         PyObject *names = PyDict_Keys(names_dict);
         PyObject *values = PyDict_Values(names_dict);
         if (!names || !values)
@@ -127,7 +125,7 @@ Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true)
 
         for (int i = 0; i < numFields; ++i)
         {
-            std::string name = toString(PyList_GetItem(names, i));
+            std::string name = python::pyObjectToString(PyList_GetItem(names, i));
             if (name == "X")
                 xyz |= 1;
             else if (name == "Y")
@@ -163,153 +161,80 @@ Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true)
 
 Array::~Array()
 {
-    if (m_array)
-        Py_XDECREF((PyObject *)m_array);
-}
-
-
-void Array::update(PointViewPtr view)
-{
-    if (m_array)
-        Py_XDECREF((PyObject *)m_array);
-    m_array = nullptr;  // Just in case of an exception.
-
-    Dimension::IdList dims = view->dims();
-    npy_intp size = view->size();
-
-    PyObject *dtype_dict = (PyObject*)buildNumpyDescription(view);
-    if (!dtype_dict)
-        throw pdal_error("Unable to build numpy dtype "
-                "description dictionary");
-
-    PyArray_Descr *dtype = nullptr;
-    if (PyArray_DescrConverter(dtype_dict, &dtype) == NPY_FAIL)
-        throw pdal_error("Unable to build numpy dtype");
-    Py_XDECREF(dtype_dict);
-
-    // This is a 1 x size array.
-    m_array = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype,
-            1, &size, 0, nullptr, NPY_ARRAY_CARRAY, nullptr);
-
-    // copy the data
-    DimTypeList types = view->dimTypes();
-    for (PointId idx = 0; idx < view->size(); idx++)
-    {
-        char *p = (char *)PyArray_GETPTR1(m_array, idx);
-        view->getPackedPoint(types, idx, p);
-    }
-}
-
-
-//ABELL - Who's responsible for incrementing the ref count?
-PyArrayObject *Array::getPythonArray() const
-{
-    return m_array;
-}
-
-PyObject* Array::buildNumpyDescription(PointViewPtr view) const
-{
-    // Build up a numpy dtype dictionary
-    //
-    // {'formats': ['f8', 'f8', 'f8', 'u2', 'u1', 'u1', 'u1', 'u1', 'u1',
-    //              'f4', 'u1', 'u2', 'f8', 'u2', 'u2', 'u2'],
-    // 'names': ['X', 'Y', 'Z', 'Intensity', 'ReturnNumber',
-    //           'NumberOfReturns', 'ScanDirectionFlag', 'EdgeOfFlightLine',
-    //           'Classification', 'ScanAngleRank', 'UserData',
-    //           'PointSourceId', 'GpsTime', 'Red', 'Green', 'Blue']}
-    //
-
-    Dimension::IdList dims = view->dims();
-
-    PyObject* dict = PyDict_New();
-    PyObject* sizes = PyList_New(dims.size());
-    PyObject* formats = PyList_New(dims.size());
-    PyObject* titles = PyList_New(dims.size());
-
-    for (size_t i = 0; i < dims.size(); ++i)
-    {
-        Dimension::Id id = dims[i];
-        Dimension::Type t = view->dimType(id);
-        npy_intp stride = view->dimSize(id);
-
-        std::string name = view->dimName(id);
-
-        std::string kind("i");
-        Dimension::BaseType b = Dimension::base(t);
-        if (b == Dimension::BaseType::Unsigned)
-            kind = "u";
-        else if (b == Dimension::BaseType::Signed)
-            kind = "i";
-        else if (b == Dimension::BaseType::Floating)
-            kind = "f";
-        else
-            throw pdal_error("Unable to map kind '" + kind  +
-                "' to PDAL dimension type");
-
-        std::stringstream oss;
-        oss << kind << stride;
-        PyObject* pySize = PyLong_FromLong(stride);
-        PyObject* pyTitle = PyUnicode_FromString(name.c_str());
-        PyObject* pyFormat = PyUnicode_FromString(oss.str().c_str());
-
-        PyList_SetItem(sizes, i, pySize);
-        PyList_SetItem(titles, i, pyTitle);
-        PyList_SetItem(formats, i, pyFormat);
-    }
-
-    PyDict_SetItemString(dict, "names", titles);
-    PyDict_SetItemString(dict, "formats", formats);
-
-    return dict;
-}
-
-bool Array::rowMajor() const
-{
-    return m_rowMajor;
+    Py_XDECREF(m_array);
 }
 
-Array::Shape Array::shape() const
+std::shared_ptr<ArrayIter> Array::iterator()
 {
-    return m_shape;
+    return std::make_shared<ArrayIter>(m_array, m_stream_handler);
 }
 
-const Array::Fields& Array::fields() const
+ArrayIter::ArrayIter(PyArrayObject* np_array, std::shared_ptr<ArrayStreamHandler> stream_handler)
+    : m_stream_handler(std::move(stream_handler))
 {
-    return m_fields;
-}
+    // Create iterator
+    m_iter = NpyIter_New(np_array,
+                         NPY_ITER_EXTERNAL_LOOP | NPY_ITER_READONLY | NPY_ITER_REFS_OK,
+                         NPY_KEEPORDER, NPY_NO_CASTING, NULL);
+    if (!m_iter)
+        throw pdal_error("Unable to create numpy iterator.");
 
-ArrayIter& Array::iterator()
-{
-    ArrayIter *it = new ArrayIter(*this);
-    m_iterators.push_back(std::unique_ptr<ArrayIter>(it));
-    return *it;
+    initIterator();
 }
 
-ArrayIter::ArrayIter(Array& array)
+void ArrayIter::initIterator()
 {
-    m_iter = NpyIter_New(array.getPythonArray(),
-        NPY_ITER_EXTERNAL_LOOP | NPY_ITER_READONLY | NPY_ITER_REFS_OK,
-        NPY_KEEPORDER, NPY_NO_CASTING, NULL);
-    if (!m_iter)
-        throw pdal_error("Unable to create numpy iterator.");
+    // For a stream handler, first execute it to get the buffer populated and know the size of the data to iterate
+    int64_t stream_chunk_size = 0;
+    if (m_stream_handler) {
+        stream_chunk_size = (*m_stream_handler)();
+        if (!stream_chunk_size) {
+            m_done = true;
+            return;
+        }
+    }
 
+    // Initialize the iterator function
     char *itererr;
     m_iterNext = NpyIter_GetIterNext(m_iter, &itererr);
     if (!m_iterNext)
     {
         NpyIter_Deallocate(m_iter);
-        throw pdal_error(std::string("Unable to create numpy iterator: ") +
-            itererr);
+        m_iter = nullptr;
+        throw pdal_error(std::string("Unable to retrieve iteration function from numpy iterator: ") + itererr);
     }
     m_data = NpyIter_GetDataPtrArray(m_iter);
-    m_stride = NpyIter_GetInnerStrideArray(m_iter);
-    m_size = NpyIter_GetInnerLoopSizePtr(m_iter);
+    m_stride = *NpyIter_GetInnerStrideArray(m_iter);
+    m_size = *NpyIter_GetInnerLoopSizePtr(m_iter);
+    if (stream_chunk_size) {
+        // Ensure chunk size is valid and then limit iteration accordingly
+        if (0 < stream_chunk_size && stream_chunk_size <= m_size) {
+            m_size = stream_chunk_size;
+        } else {
+            throw pdal_error(std::string("Stream chunk size not in the range of array length: ") +
+                             std::to_string(stream_chunk_size));
+        }
+    }
     m_done = false;
 }
 
+void ArrayIter::resetIterator()
+{
+    // Reset the iterator to the initial state
+    if (NpyIter_Reset(m_iter, NULL) != NPY_SUCCEED) {
+        NpyIter_Deallocate(m_iter);
+        m_iter = nullptr;
+        throw pdal_error("Unable to reset numpy iterator.");
+    }
+
+    initIterator();
+}
+
 ArrayIter::~ArrayIter()
 {
-    NpyIter_Deallocate(m_iter);
+    if (m_iter != nullptr) {
+        NpyIter_Deallocate(m_iter);
+    }
 }
 
 ArrayIter& ArrayIter::operator++()
@@ -317,23 +242,18 @@ ArrayIter& ArrayIter::operator++()
     if (m_done)
         return *this;
 
-    if (--(*m_size))
-        *m_data += *m_stride;
-    else if (!m_iterNext(m_iter))
-        m_done = true;
+    if (--m_size) {
+        *m_data += m_stride;
+    } else if (!m_iterNext(m_iter)) {
+        if (m_stream_handler) {
+            resetIterator();
+        } else {
+            m_done = true;
+        }
+    }
     return *this;
 }
 
-ArrayIter::operator bool () const
-{
-    return !m_done;
-}
-
-char * ArrayIter::operator * () const
-{
-    return *m_data;
-}
-
 } // namespace python
 } // namespace pdal
 
diff --git a/pdal/PyArray.hpp b/src/pdal/PyArray.hpp
similarity index 66%
rename from pdal/PyArray.hpp
rename to src/pdal/PyArray.hpp
index d77b3d10..b2aca844 100644
--- a/pdal/PyArray.hpp
+++ b/src/pdal/PyArray.hpp
@@ -34,69 +34,86 @@
 
 #pragma once
 
-#include <numpy/ndarraytypes.h>
-
+#include "export.hpp"
 #include <pdal/PointView.hpp>
+
+#define NPY_TARGET_VERSION NPY_1_22_API_VERSION
+#define NPY_NO_DEPRECATED_API NPY_1_22_API_VERSION
+
+#define NO_IMPORT_ARRAY
+#define PY_ARRAY_UNIQUE_SYMBOL PDAL_ARRAY_API
+
 #include <pdal/io/MemoryViewReader.hpp>
 
+#include <numpy/ndarraytypes.h>
+#include <numpy/arrayobject.h>
+
+#include <vector>
+#include <memory>
+
 namespace pdal
 {
 namespace python
 {
 
+
 class ArrayIter;
 
-class PDAL_DLL Array
+using ArrayStreamHandler = std::function<int64_t()>;
+
+class PDAL_EXPORT Array
 {
 public:
     using Shape = std::array<size_t, 3>;
     using Fields = std::vector<MemoryViewReader::Field>;
 
-    // Create an array for reading data from PDAL.
-    Array();
-
-    // Create an array for writing data to PDAL.
-    Array(PyArrayObject* array);
-
+    Array(PyArrayObject* array, std::shared_ptr<ArrayStreamHandler> stream_handler = {});
     ~Array();
-    void update(PointViewPtr view);
-    PyArrayObject *getPythonArray() const;
-    bool rowMajor() const;
-    Shape shape() const;
-    const Fields& fields() const;
-    ArrayIter& iterator();
 
-private:
-    inline PyObject* buildNumpyDescription(PointViewPtr view) const;
+    Array(Array&& a) = default;
+    Array& operator=(Array&& a) = default;
 
+    Array(const Array&) = delete;
+    Array() = delete;
 
+    bool rowMajor() const { return m_rowMajor; };
+    Shape shape() const { return m_shape; }
+    const Fields& fields() const { return m_fields; };
+    std::shared_ptr<ArrayIter> iterator();
+
+private:
     PyArrayObject* m_array;
-    Array& operator=(Array const& rhs);
     Fields m_fields;
     bool m_rowMajor;
     Shape m_shape {};
-    std::vector<std::unique_ptr<ArrayIter>> m_iterators;
+    std::shared_ptr<ArrayStreamHandler> m_stream_handler;
 };
 
-class ArrayIter
+
+class PDAL_EXPORT ArrayIter
 {
 public:
     ArrayIter(const ArrayIter&) = delete;
+    ArrayIter() = delete;
 
-    ArrayIter(Array& array);
+    ArrayIter(PyArrayObject*, std::shared_ptr<ArrayStreamHandler>);
     ~ArrayIter();
 
     ArrayIter& operator++();
-    operator bool () const;
-    char *operator * () const;
+    operator bool () const { return !m_done; }
+    char* operator*() const { return *m_data; }
 
 private:
-    NpyIter *m_iter;
+    NpyIter *m_iter = nullptr;
     NpyIter_IterNextFunc *m_iterNext;
     char **m_data;
-    npy_intp *m_size;
-    npy_intp *m_stride;
+    npy_intp m_size;
+    npy_intp m_stride;
     bool m_done;
+
+    std::shared_ptr<ArrayStreamHandler> m_stream_handler;
+    void initIterator();
+    void resetIterator();
 };
 
 } // namespace python
diff --git a/pdal/PyDimension.hpp b/src/pdal/PyDimension.hpp
similarity index 99%
rename from pdal/PyDimension.hpp
rename to src/pdal/PyDimension.hpp
index f49645af..faaac509 100644
--- a/pdal/PyDimension.hpp
+++ b/src/pdal/PyDimension.hpp
@@ -45,7 +45,7 @@ typedef struct Dimension
     std::string name;
     std::string description;
     std::string type;
-    int size;
+    size_t size;
     std::string units;
 } Dimension;
 
diff --git a/src/pdal/PyPipeline.cpp b/src/pdal/PyPipeline.cpp
new file mode 100644
index 00000000..7f295273
--- /dev/null
+++ b/src/pdal/PyPipeline.cpp
@@ -0,0 +1,406 @@
+/******************************************************************************
+* Copyright (c) 2016, Howard Butler (howard@hobu.co)
+*
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following
+* conditions are met:
+*
+*     * Redistributions of source code must retain the above copyright
+*       notice, this list of conditions and the following disclaimer.
+*     * Redistributions in binary form must reproduce the above copyright
+*       notice, this list of conditions and the following disclaimer in
+*       the documentation and/or other materials provided
+*       with the distribution.
+*     * Neither the name of Hobu, Inc. or Flaxen Geo Consulting nor the
+*       names of its contributors may be used to endorse or promote
+*       products derived from this software without specific prior
+*       written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+* OF SUCH DAMAGE.
+****************************************************************************/
+
+#include "PyArray.hpp"
+#include "PyPipeline.hpp"
+#include <pdal/util/Utils.hpp>
+
+#ifndef _WIN32
+#include <dlfcn.h>
+#endif
+
+namespace pdal
+{
+namespace python
+{
+
+
+void CountPointTable::reset()
+{
+    for (PointId idx = 0; idx < numPoints(); idx++)
+        if (!skip(idx))
+            m_count++;
+    FixedPointTable::reset();
+}
+
+
+PipelineExecutor::PipelineExecutor(
+    std::string const& json, std::vector<std::shared_ptr<Array>> arrays, int level)
+{
+    if (level < 0 || level > 8)
+        throw pdal_error("log level must be between 0 and 8!");
+
+    LogPtr log(Log::makeLog("pypipeline", &m_logStream));
+    log->setLevel(static_cast<pdal::LogLevel>(level));
+    m_manager.setLog(log);
+
+    std::stringstream strm;
+    strm << json;
+    m_manager.readPipeline(strm);
+
+    addArrayReaders(arrays);
+}
+
+
+point_count_t PipelineExecutor::execute(pdal::StringList allowedDims)
+{
+    if (allowedDims.size())
+    {
+        m_manager.pointTable().layout()->setAllowedDims(allowedDims);
+    }
+
+    point_count_t count = m_manager.execute();
+    m_executed = true;
+    return count;
+}
+
+std::string PipelineExecutor::getSrsWKT2() const
+{
+    std::string output("");
+    pdal::PointTableRef pointTable = m_manager.pointTable();
+
+
+    pdal::SpatialReference srs = pointTable.spatialReference();
+    output = srs.getWKT();
+
+    return output;
+}
+
+point_count_t PipelineExecutor::executeStream(point_count_t streamLimit,
+                                              pdal::StringList allowedDims)
+{
+    CountPointTable table(streamLimit);
+    if (allowedDims.size())
+    {
+        pointTable().layout()->setAllowedDims(allowedDims);
+    }
+    m_manager.executeStream(table);
+    m_executed = true;
+    return table.count();
+}
+
+const PointViewSet& PipelineExecutor::views() const
+{
+    if (!m_executed)
+        throw pdal_error("Pipeline has not been executed!");
+
+    return m_manager.views();
+}
+
+
+std::string PipelineExecutor::getPipeline() const
+{
+    std::stringstream strm;
+    pdal::PipelineWriter::writePipeline(m_manager.getStage(), strm);
+    return strm.str();
+}
+
+
+std::string PipelineExecutor::getMetadata() const
+{
+    if (!m_executed)
+        throw pdal_error("Pipeline has not been executed!");
+
+    std::stringstream strm;
+    MetadataNode root = m_manager.getMetadata().clone("metadata");
+    pdal::Utils::toJSON(root, strm);
+    return strm.str();
+}
+
+
+std::string PipelineExecutor::getSchema() const
+{
+    if (!m_executed)
+        throw pdal_error("Pipeline has not been executed!");
+
+    std::stringstream strm;
+    MetadataNode root = pointTable().layout()->toMetadata().clone("schema");
+    pdal::Utils::toJSON(root, strm);
+    return strm.str();
+}
+
+
+MetadataNode computePreview(Stage* stage)
+{
+    if (!stage)
+        throw pdal_error("no valid stage in QuickInfo");
+
+    QuickInfo qi = stage->preview();
+    if (!qi.valid())
+        throw pdal_error("No summary data available for stage '" + stage->getName()+"'" );
+
+    std::stringstream strm;
+    MetadataNode summary(stage->getName());
+    summary.add("num_points", qi.m_pointCount);
+    if (qi.m_srs.valid())
+    {
+        MetadataNode srs = qi.m_srs.toMetadata();
+        summary.add(srs);
+    }
+    if (qi.m_bounds.valid())
+    {
+        MetadataNode bounds = Utils::toMetadata(qi.m_bounds);
+        summary.add(bounds.clone("bounds"));
+    }
+
+    std::string dims;
+    auto di = qi.m_dimNames.begin();
+    while (di != qi.m_dimNames.end())
+    {
+        dims += *di;
+        ++di;
+        if (di != qi.m_dimNames.end())
+           dims += ", ";
+    }
+    if (dims.size())
+        summary.add("dimensions", dims);
+
+    if (!qi.m_metadata.empty() && qi.m_metadata.valid())
+    {
+        summary.add(qi.m_metadata.clone("metadata"));
+    }
+
+    pdal::Utils::toJSON(summary, strm);
+    return summary;
+
+}
+
+
+std::string PipelineExecutor::getQuickInfo() const
+{
+
+    Stage* stage(nullptr);
+    std::vector<Stage *> stages = m_manager.stages();
+    std::vector<Stage *> previewStages;
+
+    for (auto const& s: stages)
+    {
+        auto n = s->getName();
+        auto v = pdal::Utils::split2(n,'.');
+        if (v.size() > 0)
+            if (pdal::Utils::iequals(v[0], "readers"))
+                previewStages.push_back(s);
+    }
+
+    MetadataNode summary;
+    for (auto const& stage: previewStages)
+    {
+        MetadataNode n = computePreview(stage);
+        summary.add(n);
+    }
+
+    std::stringstream strm;
+    pdal::Utils::toJSON(summary, strm);
+    return strm.str();
+}
+
+void PipelineExecutor::addArrayReaders(std::vector<std::shared_ptr<Array>> arrays)
+{
+
+    if (arrays.empty())
+        return;
+
+    std::vector<Stage *> roots = m_manager.roots();
+    if (roots.size() != 1)
+        throw pdal_error("Filter pipeline must contain a single root stage.");
+
+    for (auto array : arrays)
+    {
+        // Create numpy reader for each array
+        // Options
+
+        Options options;
+        options.add("order", array->rowMajor() ?
+            MemoryViewReader::Order::RowMajor :
+            MemoryViewReader::Order::ColumnMajor);
+        options.add("shape", MemoryViewReader::Shape(array->shape()));
+
+        Stage& s = m_manager.makeReader("", "readers.memoryview", options);
+        MemoryViewReader& r = dynamic_cast<MemoryViewReader &>(s);
+        for (auto f : array->fields())
+            r.pushField(f);
+
+        auto arrayIter = array->iterator();
+        auto incrementer = [arrayIter, firstPoint = true](PointId id) mutable -> char *
+        {
+            ArrayIter& iter = *arrayIter;
+            if (!firstPoint && iter) {
+                ++iter;
+            } else {
+                firstPoint = false;
+            }
+
+            if (!iter)
+                return nullptr;
+
+            char *c = *iter;
+            return c;
+        };
+
+        r.setIncrementer(incrementer);
+        roots[0]->setInput(r);
+    }
+
+    m_manager.validateStageOptions();
+}
+
+
+PyObject* buildNumpyDescriptor(PointLayoutPtr layout)
+{
+    // Build up a numpy dtype dictionary
+    //
+    // {'formats': ['f8', 'f8', 'f8', 'u2', 'u1', 'u1', 'u1', 'u1', 'u1',
+    //              'f4', 'u1', 'u2', 'f8', 'u2', 'u2', 'u2'],
+    // 'names': ['X', 'Y', 'Z', 'Intensity', 'ReturnNumber',
+    //           'NumberOfReturns', 'ScanDirectionFlag', 'EdgeOfFlightLine',
+    //           'Classification', 'ScanAngleRank', 'UserData',
+    //           'PointSourceId', 'GpsTime', 'Red', 'Green', 'Blue']}
+    //
+
+    // Ensure that the dimensions are sorted by offset
+    // Is there a better way? Can they be sorted by offset already?
+    auto sortByOffset = [layout](Dimension::Id id1, Dimension::Id id2) -> bool
+    {
+        return layout->dimOffset(id1) < layout->dimOffset(id2);
+    };
+
+    auto dims = layout->dims();
+    std::sort(dims.begin(), dims.end(), sortByOffset);
+
+    PyObject* names = PyList_New(dims.size());
+    PyObject* formats = PyList_New(dims.size());
+    for (size_t i = 0; i < dims.size(); ++i)
+    {
+        Dimension::Id id = dims[i];
+        auto name = layout->dimName(id);
+        PyList_SetItem(names, i, PyUnicode_FromString(name.c_str()));
+
+        std::stringstream format;
+        switch (Dimension::base(layout->dimType(id)))
+        {
+            case Dimension::BaseType::Unsigned:
+                format << 'u';
+                break;
+            case Dimension::BaseType::Signed:
+                format << 'i';
+                break;
+            case Dimension::BaseType::Floating:
+                format << 'f';
+                break;
+            default:
+                throw pdal_error("Unable to map dimension '" + name  + "' to Numpy");
+        }
+        format << layout->dimSize(id);
+        PyList_SetItem(formats, i, PyUnicode_FromString(format.str().c_str()));
+
+    }
+    PyObject* dtype_dict = PyDict_New();
+    PyDict_SetItemString(dtype_dict, "names", names);
+    PyDict_SetItemString(dtype_dict, "formats", formats);
+    return dtype_dict;
+}
+
+
+PyArrayObject* viewToNumpyArray(PointViewPtr view)
+{
+
+    PyObject* dtype_dict = buildNumpyDescriptor(view->layout());
+    PyArray_Descr *dtype = nullptr;
+    if (PyArray_DescrConverter(dtype_dict, &dtype) == NPY_FAIL)
+        throw pdal_error("Unable to build numpy dtype");
+    Py_XDECREF(dtype_dict);
+
+    // This is a 1 x size array.
+    npy_intp size = view->size();
+    PyArrayObject* array = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype,
+            1, &size, 0, nullptr, NPY_ARRAY_CARRAY, nullptr);
+
+    // copy the data
+    DimTypeList types = view->dimTypes();
+    for (PointId idx = 0; idx < view->size(); idx++)
+        view->getPackedPoint(types, idx, (char *)PyArray_GETPTR1(array, idx));
+    return array;
+}
+
+
+PyArrayObject* meshToNumpyArray(const TriangularMesh* mesh)
+{
+    // Build up a numpy dtype dictionary
+    //
+    // {'formats': ['f8', 'f8', 'f8', 'u2', 'u1', 'u1', 'u1', 'u1', 'u1',
+    //              'f4', 'u1', 'u2', 'f8', 'u2', 'u2', 'u2'],
+    // 'names': ['X', 'Y', 'Z', 'Intensity', 'ReturnNumber',
+    //           'NumberOfReturns', 'ScanDirectionFlag', 'EdgeOfFlightLine',
+    //           'Classification', 'ScanAngleRank', 'UserData',
+    //           'PointSourceId', 'GpsTime', 'Red', 'Green', 'Blue']}
+    //
+    PyObject* names = PyList_New(3);
+    PyList_SetItem(names, 0, PyUnicode_FromString("A"));
+    PyList_SetItem(names, 1, PyUnicode_FromString("B"));
+    PyList_SetItem(names, 2, PyUnicode_FromString("C"));
+
+    PyObject* formats = PyList_New(3);
+    PyList_SetItem(formats, 0, PyUnicode_FromString("u4"));
+    PyList_SetItem(formats, 1, PyUnicode_FromString("u4"));
+    PyList_SetItem(formats, 2, PyUnicode_FromString("u4"));
+
+    PyObject* dtype_dict = PyDict_New();
+    PyDict_SetItemString(dtype_dict, "names", names);
+    PyDict_SetItemString(dtype_dict, "formats", formats);
+
+    PyArray_Descr *dtype = nullptr;
+    if (PyArray_DescrConverter(dtype_dict, &dtype) == NPY_FAIL)
+        throw pdal_error("Unable to build numpy dtype");
+    Py_XDECREF(dtype_dict);
+
+    // This is a 1 x size array.
+    npy_intp size = mesh ? mesh->size() : 0;
+    PyArrayObject* array = (PyArrayObject*)PyArray_NewFromDescr(&PyArray_Type, dtype,
+            1, &size, 0, nullptr, NPY_ARRAY_CARRAY, nullptr);
+    for (PointId idx = 0; idx < size; idx++)
+    {
+        char* p = (char *)PyArray_GETPTR1(array, idx);
+        const Triangle& t = (*mesh)[idx];
+        uint32_t a = (uint32_t)t.m_a;
+        std::memcpy(p, &a, 4);
+        uint32_t b = (uint32_t)t.m_b;
+        std::memcpy(p + 4, &b, 4);
+        uint32_t c = (uint32_t)t.m_c;
+        std::memcpy(p + 8, &c,  4);
+    }
+    return array;
+}
+
+} // namespace python
+} // namespace pdal
diff --git a/src/pdal/PyPipeline.hpp b/src/pdal/PyPipeline.hpp
new file mode 100644
index 00000000..1eed023f
--- /dev/null
+++ b/src/pdal/PyPipeline.hpp
@@ -0,0 +1,102 @@
+/******************************************************************************
+* Copyright (c) 2016, Howard Butler (howard@hobu.co)
+*
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following
+* conditions are met:
+*
+*     * Redistributions of source code must retain the above copyright
+*       notice, this list of conditions and the following disclaimer.
+*     * Redistributions in binary form must reproduce the above copyright
+*       notice, this list of conditions and the following disclaimer in
+*       the documentation and/or other materials provided
+*       with the distribution.
+*     * Neither the name of Hobu, Inc. or Flaxen Geo Consulting nor the
+*       names of its contributors may be used to endorse or promote
+*       products derived from this software without specific prior
+*       written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+* OF SUCH DAMAGE.
+****************************************************************************/
+
+#pragma once
+
+#include "export.hpp"
+#include <pdal/PipelineManager.hpp>
+
+#define NPY_TARGET_VERSION NPY_1_22_API_VERSION
+#define NPY_NO_DEPRECATED_API NPY_1_22_API_VERSION
+
+#define NO_IMPORT_ARRAY
+#define PY_ARRAY_UNIQUE_SYMBOL PDAL_ARRAY_API
+
+#include <numpy/arrayobject.h>
+
+namespace pdal
+{
+namespace python
+{
+
+PyObject* buildNumpyDescriptor(PointLayoutPtr layout);
+PyArrayObject* viewToNumpyArray(PointViewPtr view);
+PyArrayObject* meshToNumpyArray(const TriangularMesh* mesh);
+
+class Array;
+
+class PDAL_EXPORT PipelineExecutor {
+public:
+    PipelineExecutor(std::string const& json, std::vector<std::shared_ptr<Array>> arrays, int level);
+    virtual ~PipelineExecutor() = default;
+
+    point_count_t execute(pdal::StringList allowedDims);
+    point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims);
+
+    const PointViewSet& views() const;
+    std::string getPipeline() const;
+    std::string getMetadata() const;
+    std::string getQuickInfo() const;
+    std::string getSchema() const;
+    std::string getSrsWKT2() const;
+    PipelineManager const& getManager() const { return m_manager; }
+    std::string getLog() const { return m_logStream.str(); }
+
+protected:
+    virtual ConstPointTableRef pointTable() const { return m_manager.pointTable(); }
+
+    pdal::PipelineManager m_manager;
+    bool m_executed = false;
+
+private:
+    void addArrayReaders(std::vector<std::shared_ptr<Array>> arrays);
+
+    std::stringstream m_logStream;
+};
+
+class CountPointTable : public FixedPointTable
+{
+public:
+    CountPointTable(point_count_t capacity) : FixedPointTable(capacity), m_count(0) {}
+    point_count_t count() const { return m_count; }
+
+protected:
+    virtual void reset();
+
+private:
+    point_count_t m_count;
+};
+
+} // namespace python
+} // namespace pdal
diff --git a/src/pdal/StreamableExecutor.cpp b/src/pdal/StreamableExecutor.cpp
new file mode 100644
index 00000000..5fa01931
--- /dev/null
+++ b/src/pdal/StreamableExecutor.cpp
@@ -0,0 +1,242 @@
+/******************************************************************************
+* Copyright (c) 2016, Howard Butler (howard@hobu.co)
+*
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following
+* conditions are met:
+*
+*     * Redistributions of source code must retain the above copyright
+*       notice, this list of conditions and the following disclaimer.
+*     * Redistributions in binary form must reproduce the above copyright
+*       notice, this list of conditions and the following disclaimer in
+*       the documentation and/or other materials provided
+*       with the distribution.
+*     * Neither the name of Hobu, Inc. or Flaxen Geo Consulting nor the
+*       names of its contributors may be used to endorse or promote
+*       products derived from this software without specific prior
+*       written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+* OF SUCH DAMAGE.
+****************************************************************************/
+
+#include "PyPipeline.hpp"
+#include "StreamableExecutor.hpp"
+
+#define NO_IMPORT_ARRAY
+#define PY_ARRAY_UNIQUE_SYMBOL PDAL_ARRAY_API
+
+#include <Python.h>
+#include <numpy/arrayobject.h>
+
+#include <pdal/Stage.hpp>
+#include <pdal/pdal_features.hpp>
+
+namespace pdal
+{
+namespace python
+{
+
+// PythonPointTable
+
+PythonPointTable::PythonPointTable(point_count_t limit, int prefetch) :
+    StreamPointTable(m_layout, limit), m_prefetch(prefetch),
+    m_curArray(nullptr), m_dtype(nullptr)
+{}
+
+PythonPointTable::~PythonPointTable()
+{
+    auto gil = PyGILState_Ensure();
+    Py_XDECREF(m_dtype);
+    Py_XDECREF(m_curArray);
+    PyGILState_Release(gil);
+}
+
+void PythonPointTable::finalize()
+{
+    BasePointTable::finalize();
+
+    // create dtype
+    auto gil = PyGILState_Ensure();
+
+    PyObject *dtype_dict = buildNumpyDescriptor(&m_layout);
+    if (PyArray_DescrConverter(dtype_dict, &m_dtype) == NPY_FAIL)
+        throw pdal_error("Unable to create numpy dtype");
+    Py_XDECREF(dtype_dict);
+    PyGILState_Release(gil);
+
+    py_createArray();
+}
+
+void PythonPointTable::py_createArray()
+{
+    auto gil = PyGILState_Ensure();
+    npy_intp size = capacity();
+    Py_INCREF(m_dtype);
+    m_curArray = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, m_dtype,
+        1, &size, 0, nullptr, NPY_ARRAY_CARRAY, nullptr);
+    PyGILState_Release(gil);
+}
+
+void PythonPointTable::py_resizeArray(point_count_t np)
+{
+    npy_intp sizes[1];
+    sizes[0] = np;
+    PyArray_Dims dims{ sizes, 1 };
+
+    auto gil = PyGILState_Ensure();
+    // copy the non-skipped elements to the beginning
+    npy_intp dest_idx = 0;
+    for (PointId src_idx = 0; src_idx < numPoints(); src_idx++)
+        if (!skip(src_idx))
+        {
+            if (src_idx != dest_idx)
+            {
+                PyObject* src_item = PyArray_GETITEM(m_curArray, (const char*) PyArray_GETPTR1(m_curArray, src_idx));
+                PyArray_SETITEM(m_curArray, (char*) PyArray_GETPTR1(m_curArray, dest_idx), src_item);
+                Py_XDECREF(src_item);
+            }
+            dest_idx++;
+        }
+    PyArray_Resize(m_curArray, &dims, true, NPY_CORDER);
+    PyGILState_Release(gil);
+}
+
+void PythonPointTable::reset()
+{
+    point_count_t np = 0;
+    for (PointId idx = 0; idx < numPoints(); idx++)
+        if (!skip(idx))
+            np++;
+
+    if (np && np != capacity())
+        py_resizeArray(np);
+
+    // This will keep putting arrays on the list until done, whether or not the consumer
+    // can handle them that fast. We can modify as appropriate to block if desired.
+    std::unique_lock<std::mutex> l(m_mutex);
+    {
+        // It's possible that this is called with 0 points processed, in which case
+        // we don't push the current array.
+        if (np)
+        {
+            m_arrays.push(m_curArray);
+            py_createArray();
+            m_producedCv.notify_one();
+        }
+        while (m_arrays.size() > m_prefetch)
+            m_consumedCv.wait(l);
+    }
+}
+
+void PythonPointTable::disable()
+{
+    // TODO: uncomment the next line when/if StreamPointTable.m_capacity
+    // changes from private to protected
+    // m_capacity = 0;
+}
+
+void PythonPointTable::done()
+{
+    m_arrays.push(nullptr);
+    m_producedCv.notify_one();
+}
+
+PyArrayObject *PythonPointTable::fetchArray()
+{
+    PyArrayObject *arr = nullptr;
+
+    // Lock scope.
+    Py_BEGIN_ALLOW_THREADS
+    {
+        std::unique_lock<std::mutex> l(m_mutex);
+        while (m_arrays.empty())
+            m_producedCv.wait(l);
+
+        // Grab the array from the front of the list and notify that we did so.
+        arr = m_arrays.front();
+        m_arrays.pop();
+    }
+    Py_END_ALLOW_THREADS
+    // Notify that we consumed an array.
+    m_consumedCv.notify_one();
+    return arr;
+}
+
+char *PythonPointTable::getPoint(PointId idx)
+{
+    return (char *)PyArray_GETPTR1(m_curArray, idx);
+}
+
+
+// StreamableExecutor
+
+StreamableExecutor::StreamableExecutor(std::string const& json,
+                                       std::vector<std::shared_ptr<Array>> arrays,
+                                       int level,
+                                       point_count_t chunkSize,
+                                       int prefetch,
+                                       pdal::StringList allowedDims)
+    : PipelineExecutor(json, arrays, level)
+    , m_table(chunkSize, prefetch)
+    , m_exc(nullptr)
+{
+
+    if (allowedDims.size())
+    {
+        m_table.layout()->setAllowedDims(allowedDims);
+    }
+    m_thread.reset(new std::thread([this]()
+    {
+        try {
+            m_manager.executeStream(m_table);
+        } catch (...) {
+            m_exc = std::current_exception();
+        }
+        m_table.done();
+    }));
+}
+
+StreamableExecutor::~StreamableExecutor()
+{
+    if (!m_executed)
+    {
+        m_table.disable();
+        auto gil = PyGILState_Ensure();
+        while (PyArrayObject* arr = m_table.fetchArray())
+            Py_XDECREF(arr);
+        PyGILState_Release(gil);
+    }
+    Py_BEGIN_ALLOW_THREADS
+    m_thread->join();
+    Py_END_ALLOW_THREADS
+}
+
+PyArrayObject *StreamableExecutor::executeNext()
+{
+    PyArrayObject* arr = nullptr;
+    if (!m_executed)
+    {
+        arr = m_table.fetchArray();
+        if (arr == nullptr)
+            m_executed = true;
+        if (m_exc)
+            std::rethrow_exception(m_exc);
+    }
+    return arr;
+}
+
+} // namespace python
+} // namespace pdal
diff --git a/pdal/PyPipeline.hpp b/src/pdal/StreamableExecutor.hpp
similarity index 57%
rename from pdal/PyPipeline.hpp
rename to src/pdal/StreamableExecutor.hpp
index a1ddc30b..f565c8ee 100644
--- a/pdal/PyPipeline.hpp
+++ b/src/pdal/StreamableExecutor.hpp
@@ -34,62 +34,66 @@
 
 #pragma once
 
-#include <pdal/PipelineManager.hpp>
-#include <pdal/PipelineWriter.hpp>
-#include <pdal/util/FileUtils.hpp>
-#include <pdal/PipelineExecutor.hpp>
+#include <condition_variable>
+#include <thread>
 
-#include <string>
-#include <sstream>
-#include <memory>
+#include "PyPipeline.hpp"
 
 namespace pdal
 {
 namespace python
 {
 
-class Array;
-
-class python_error : public std::runtime_error
+class PythonPointTable : public StreamPointTable
 {
 public:
-    inline python_error(std::string const& msg) : std::runtime_error(msg)
-        {}
+    PythonPointTable(point_count_t size, int prefetch);
+    ~PythonPointTable();
+
+    virtual void finalize();
+    void disable();
+    void done();
+    PyArrayObject *fetchArray();
+
+protected:
+    virtual void reset();
+    virtual char *getPoint(PointId idx);
+
+private:
+    // All functions starting with py_ call Python things that need the GIL locked.
+    void py_createArray();
+    void py_resizeArray(point_count_t np);
+
+    int m_prefetch;
+    PointLayout m_layout;
+    PyArrayObject *m_curArray;
+    PyArray_Descr *m_dtype;
+    std::mutex m_mutex;
+    std::condition_variable m_producedCv;
+    std::condition_variable m_consumedCv;
+    std::queue<PyArrayObject *> m_arrays;
 };
 
-class Pipeline
+class StreamableExecutor : public PipelineExecutor
 {
 public:
-    Pipeline(std::string const& json);
-    Pipeline(std::string const& json,
-        std::vector<pdal::python::Array*> arrays);
-    ~Pipeline();
+    StreamableExecutor(std::string const& json,
+                       std::vector<std::shared_ptr<Array>> arrays,
+                       int level,
+                       point_count_t chunkSize,
+                       int prefetch,
+                       pdal::StringList allowedDim);
+    ~StreamableExecutor();
 
-    int64_t execute();
-    bool validate();
-    inline std::string getPipeline() const
-    {
-        return m_executor->getPipeline();
-    }
-    inline std::string getMetadata() const
-    {
-        return m_executor->getMetadata();
-    }
-    inline std::string getSchema() const
-    {
-        return m_executor->getSchema();
-    }
-    inline std::string getLog() const
-    {
-        return m_executor->getLog();
-    }
-    std::vector<pdal::python::Array *> getArrays() const;
-
-    void setLogLevel(int level);
-    int getLogLevel() const;
+    MetadataNode getMetadata() { return m_table.metadata(); }
+    PyArrayObject* executeNext();
 
 private:
-    std::shared_ptr<pdal::PipelineExecutor> m_executor;
+    ConstPointTableRef pointTable() const { return m_table; }
+
+    PythonPointTable m_table;
+    std::unique_ptr<std::thread> m_thread;
+    std::exception_ptr m_exc;
 };
 
 } // namespace python
diff --git a/src/pdal/__init__.py b/src/pdal/__init__.py
new file mode 100644
index 00000000..c67ab5e3
--- /dev/null
+++ b/src/pdal/__init__.py
@@ -0,0 +1,12 @@
+__all__ = ["Pipeline", "Stage", "Reader", "Filter", "Writer", "dimensions", "info"]
+__version__ = '3.5.3'
+
+from . import libpdalpython
+from .drivers import inject_pdal_drivers
+from .pipeline import Filter, Pipeline, Reader, Stage, Writer
+
+inject_pdal_drivers()
+dimensions = libpdalpython.getDimensions()
+info = libpdalpython.getInfo()
+
+del inject_pdal_drivers, libpdalpython
diff --git a/src/pdal/__main__.py b/src/pdal/__main__.py
new file mode 100644
index 00000000..4569e522
--- /dev/null
+++ b/src/pdal/__main__.py
@@ -0,0 +1,79 @@
+import sys
+import os
+import pathlib
+
+import sysconfig
+
+import argparse
+
+import pdal
+
+from . import __version__
+
+__all__ = ["main"]
+
+
+def __dir__() -> list[str]:
+    return __all__
+
+
+def print_driver_path(args):
+    if 'PDAL_DRIVER_PATH' in os.environ:
+        print (os.environ['PDAL_DRIVER_PATH'])
+
+def print_plugin_path(args):
+    purelib = sysconfig.get_paths()["purelib"]
+
+    if sys.platform == "linux" or sys.platform == "linux2":
+        suffix = 'so'
+        purelib = purelib + os.path.sep + "pdal"
+    elif sys.platform == "darwin":
+        suffix = 'dylib'
+        purelib = purelib + os.path.sep + "pdal"
+    elif sys.platform == "win32":
+        suffix = 'dll'
+        purelib = purelib + os.path.sep + "bin"
+
+    for f in pathlib.Path(purelib).glob(f'*.{suffix}'):
+        if 'pdal' in str(f.name):
+            if 'numpy' in str(f.name) or 'python' in str(f.name):
+                print (purelib)
+                return # we are done
+
+def print_version(args):
+    info = pdal.drivers.libpdalpython.getInfo()
+    pdal_version = info.version
+    plugin = info.plugin
+    debug = info.debug
+
+    line = '----------------------------------------------------------------------------------------------------------------------------\n'
+    version = f'PDAL version {pdal_version}\nPython bindings version {__version__}\n'
+    driver_path = 'PDAL_DRIVER_PATH not set!'
+    if 'PDAL_DRIVER_PATH' in os.environ:
+        driver_path = os.environ['PDAL_DRIVER_PATH']
+    plugin = f"Environment-set PDAL_DRIVER_PATH: {driver_path}"
+    output = f'{line}{version}{plugin}\n{line}\n{debug}'
+    print (output)
+
+
+def main() -> None:
+    header = f"PDAL Python bindings {__version__} on Python {sys.version}"
+
+    parser = argparse.ArgumentParser(description=header)
+    parser.add_argument('--pdal-driver-path',  action='store_true',
+                        help='print PDAL_DRIVER_PATH including Python plugin locations')
+    parser.add_argument('--pdal-plugin-path',  action='store_true',
+                        help='print location of PDAL Python plugins')
+
+    args = parser.parse_args()
+
+    if args.pdal_driver_path:
+        print_driver_path(args)
+    elif args.pdal_plugin_path:
+        print_plugin_path(args)
+    else:
+        print_version(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/pdal/drivers.py b/src/pdal/drivers.py
new file mode 100644
index 00000000..78d3dbb7
--- /dev/null
+++ b/src/pdal/drivers.py
@@ -0,0 +1,84 @@
+import json
+import subprocess
+from dataclasses import dataclass, field
+from typing import Callable, ClassVar, FrozenSet, Mapping, Optional, Sequence, Type
+
+from .pipeline import Filter, Reader, Stage, Writer
+from . import libpdalpython
+
+import shlex
+
+StreamableTypes: FrozenSet
+
+
+@dataclass
+class Option:
+    name: str
+    description: str
+    default: Optional[str] = None
+
+    def __repr__(self) -> str:
+        if self.default is not None:
+            return f"{self.name}={self.default!r}: {self.description}"
+        else:
+            return f"{self.name}: {self.description}"
+
+
+@dataclass
+class Driver:
+    name: str
+    short_name: str = field(init=False)
+    type: Type[Stage] = field(init=False)
+    description: str
+    options: Sequence[Option]
+
+    def __post_init__(self) -> None:
+        prefix, _, suffix = self.name.partition(".")
+        self.type = self._prefix_to_type[prefix]
+        self.short_name = suffix
+
+    @property
+    def factory(self) -> Callable[..., Stage]:
+        if self.options and self.options[0].name == "filename":
+            factory = lambda filename, **kwargs: self.type(
+                filename=filename, type=self.name, **kwargs
+            )
+        else:
+            factory = lambda **kwargs: self.type(type=self.name, **kwargs)
+        factory.__name__ = self.short_name
+        factory.__qualname__ = f"{self.type.__name__}.{self.short_name}"
+        factory.__module__ = self.type.__module__
+        factory.__doc__ = self.description
+        if self.options:
+            factory.__doc__ += "\n\n"
+            factory.__doc__ += "\n".join(map(repr, self.options))
+        return factory
+
+    _prefix_to_type: ClassVar[Mapping[str, Type[Stage]]] = {
+        "readers": Reader,
+        "filters": Filter,
+        "writers": Writer,
+    }
+
+
+def inject_pdal_drivers() -> None:
+
+    drivers = libpdalpython.getDrivers()
+    options = libpdalpython.getOptions()
+
+    streamable = []
+    for d in drivers:
+        name = d["name"]
+        d_options = [Option(**option_dict) for option_dict in (options.get(name) or ())]
+        # move filename option first
+        try:
+            i = next(i for i, opt in enumerate(d_options) if opt.name == "filename")
+            d_options.insert(0, d_options.pop(i))
+        except StopIteration:
+            pass
+        driver = Driver(name, d["description"], d_options)
+        setattr(driver.type, driver.short_name, staticmethod(driver.factory))
+        if d["streamable"]:
+            streamable.append(driver.name)
+    global StreamableTypes
+    StreamableTypes = frozenset(streamable)
diff --git a/src/pdal/export.hpp b/src/pdal/export.hpp
new file mode 100644
index 00000000..5a6c9aea
--- /dev/null
+++ b/src/pdal/export.hpp
@@ -0,0 +1,44 @@
+/******************************************************************************
+* Copyright (c) 2025, Hobu Inc. (info@hobu.co)
+*
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following
+* conditions are met:
+*
+*     * Redistributions of source code must retain the above copyright
+*       notice, this list of conditions and the following disclaimer.
+*     * Redistributions in binary form must reproduce the above copyright
+*       notice, this list of conditions and the following disclaimer in
+*       the documentation and/or other materials provided
+*       with the distribution.
+*     * Neither the name of Hobu, Inc. nor the
+*       names of its contributors may be used to endorse or promote
+*       products derived from this software without specific prior
+*       written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
+* OF SUCH DAMAGE.
+****************************************************************************/
+
+
+#include <pdal/util/pdal_util_export.hpp>
+
+#ifndef PDAL_EXPORT
+#   define PDAL_EXPORT  PDAL_DLL
+#endif
+
+#ifndef PDAL_DLL
+#   define PDAL_DLL     PDAL_EXPORT
+#endif
diff --git a/src/pdal/libpdalpython.cpp b/src/pdal/libpdalpython.cpp
new file mode 100644
index 00000000..09118fbf
--- /dev/null
+++ b/src/pdal/libpdalpython.cpp
@@ -0,0 +1,353 @@
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include <pybind11/numpy.h>
+#include <pybind11/functional.h>
+#include <pybind11/stl/filesystem.h>
+#include <iostream>
+
+#include <pdal/pdal_config.hpp>
+#include <pdal/StageFactory.hpp>
+
+#define NPY_TARGET_VERSION NPY_1_22_API_VERSION
+#define NPY_NO_DEPRECATED_API NPY_1_22_API_VERSION
+
+#define PY_ARRAY_UNIQUE_SYMBOL PDAL_ARRAY_API
+
+#include <numpy/arrayobject.h>
+
+#include "PyArray.hpp"
+#include "PyDimension.hpp"
+#include "PyPipeline.hpp"
+#include "StreamableExecutor.hpp"
+
+namespace py = pybind11;
+
+namespace pdal {
+    using namespace py::literals;
+
+    py::object getInfo() {
+        return py::module_::import("types").attr("SimpleNamespace")(
+                "version"_a = pdal::Config::versionString(),
+                "major"_a = pdal::Config::versionMajor(),
+                "minor"_a = pdal::Config::versionMinor(),
+                "patch"_a = pdal::Config::versionPatch(),
+                "debug"_a = pdal::Config::debugInformation(),
+                "sha1"_a = pdal::Config::sha1(),
+                "plugin"_a = pdal::Config::pluginInstallPath()
+        );
+    };
+
+   std::vector<py::dict> getDrivers() {
+        std::vector<py::dict> drivers;
+
+        pdal::StageFactory f(false);
+        pdal::PluginManager<pdal::Stage>::loadAll();
+        pdal::StringList stages = pdal::PluginManager<pdal::Stage>::names();
+
+        pdal::StageExtensions& extensions = pdal::PluginManager<pdal::Stage>::extensions();
+        for (auto name : stages)
+        {
+            pdal::Stage *s = f.createStage(name);
+            std::string description = pdal::PluginManager<Stage>::description(name);
+            std::string link = pdal::PluginManager<Stage>::link(name);
+            std::vector<std::string> extension_names = extensions.extensions(name);
+
+            py::dict d(
+                "name"_a=name,
+                "description"_a=description,
+                "streamable"_a=s->pipelineStreamable(),
+                "extensions"_a=extension_names
+            );
+            f.destroyStage(s);
+            drivers.push_back(std::move(d));
+        }
+        return drivers;
+
+    };
+
+   py::object getOptions() {
+        py::object json = py::module_::import("json");
+        py::dict stageOptions;
+
+        pdal::StageFactory f;
+        pdal::PluginManager<pdal::Stage>::loadAll();
+        pdal::StringList stages = pdal::PluginManager<pdal::Stage>::names();
+
+        for (auto name : stages)
+        {
+            pdal::Stage *s = f.createStage(name);
+            pdal::ProgramArgs args;
+            s->addAllArgs(args);
+            std::ostringstream ostr;
+            args.dump3(ostr);
+            py::str pystring(ostr.str());
+            pystring.attr("strip");
+
+            py::object j;
+
+            try {
+                j = json.attr("loads")(pystring);
+            } catch (py::error_already_set &e) {
+                std::cerr << "failed:" << name << "'" << ostr.str() << "'" <<std::endl;
+                continue; // skip this one because we can't parse it
+            }
+
+            f.destroyStage(s);
+            stageOptions[pybind11::cast(name)] = std::move(j);
+       }
+        return stageOptions;
+
+    };
+
+    std::vector<py::dict> getDimensions() {
+        py::object np = py::module_::import("numpy");
+        py::object dtype = np.attr("dtype");
+        std::vector<py::dict> dims;
+        for (const auto& dim: getValidDimensions())
+        {
+            py::dict d(
+                "name"_a=dim.name,
+                "description"_a=dim.description,
+                "dtype"_a=dtype(dim.type + std::to_string(dim.size))
+            );
+            dims.push_back(std::move(d));
+        }
+        return dims;
+    };
+
+    std::string getReaderDriver(std::filesystem::path const& p)
+    {
+        return StageFactory::inferReaderDriver(p.string());
+    }
+
+    std::string getWriterDriver(std::filesystem::path const& p)
+    {
+        return StageFactory::inferWriterDriver(p.string());
+    }
+
+    using pdal::python::PipelineExecutor;
+    using pdal::python::StreamableExecutor;
+
+    class PipelineIterator : public StreamableExecutor {
+    public:
+        using StreamableExecutor::StreamableExecutor;
+
+        py::object getSchema() {
+            return py::module_::import("json").attr("loads")(StreamableExecutor::getSchema());
+        }
+
+        py::array executeNext() {
+            PyArrayObject* arr(StreamableExecutor::executeNext());
+            if (!arr)
+                throw py::stop_iteration();
+
+            return py::reinterpret_steal<py::array>((PyObject*)arr);
+        }
+
+        py::object getMetadata() {
+            py::object json = py::module_::import("json");
+
+            std::stringstream strm;
+            MetadataNode root = (StreamableExecutor::getMetadata()).clone("metadata");
+            pdal::Utils::toJSON(root, strm);
+
+
+            py::bytes pybytes(strm.str());
+            py::str pystring ( pybytes.attr("decode")("utf-8", "ignore"));
+
+            py::object j;
+            j = json.attr("loads")(pystring);
+
+            return j;
+
+        }
+
+    };
+
+    class Pipeline {
+    public:
+        point_count_t execute(pdal::StringList allowedDims) {
+            point_count_t response(0);
+            {
+                py::gil_scoped_release release;
+                response = getExecutor()->execute(allowedDims);
+            }
+            return response;
+        }
+
+        point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims) {
+            point_count_t response(0);
+            {
+                py::gil_scoped_release release;
+                response = getExecutor()->executeStream(streamLimit, allowedDims);
+            }
+            return response;
+        }
+
+        std::unique_ptr<PipelineIterator> iterator(int chunk_size, int prefetch, pdal::StringList allowedDims) {
+            return std::unique_ptr<PipelineIterator>(new PipelineIterator(
+                getJson(), _inputs, _loglevel, chunk_size, prefetch, allowedDims
+            ));
+        }
+
+        void setInputs(const std::vector<py::object>& inputs) {
+            _inputs.clear();
+            for (const auto& input_obj: inputs) {
+                if (py::isinstance<py::array>(input_obj)) {
+                    // Backward compatibility for accepting list of numpy arrays
+                    auto ndarray = input_obj.cast<py::array>();
+                    _inputs.push_back(std::make_shared<pdal::python::Array>((PyArrayObject*)ndarray.ptr()));
+                } else {
+                    // Now expected to be a list of pairs: (numpy array, <optional> stream handler)
+                    auto input = input_obj.cast<std::pair<py::array, pdal::python::ArrayStreamHandler>>();
+                    _inputs.push_back(std::make_shared<pdal::python::Array>(
+                            (PyArrayObject*)input.first.ptr(),
+                            input.second ?
+                                std::make_shared<pdal::python::ArrayStreamHandler>(input.second)
+                                : nullptr));
+                }
+            }
+            delExecutor();
+        }
+
+        int getLoglevel() { return _loglevel; }
+
+        void setLogLevel(int level) { _loglevel = level; delExecutor(); }
+
+        std::string getLog() { return getExecutor()->getLog(); }
+
+        std::string getPipeline() { return getExecutor()->getPipeline(); }
+        std::string getSrsWKT2() { return getExecutor()->getSrsWKT2(); }
+
+        py::object getQuickInfo() {
+            py::object json = py::module_::import("json");
+
+            std::string response;
+            {
+                py::gil_scoped_release release;
+                response = getExecutor()->getQuickInfo();
+            }
+            py::bytes pybytes(response);
+
+            py::str pystring ( pybytes.attr("decode")("utf-8", "ignore"));
+            pystring.attr("strip");
+
+            py::object j;
+            j = json.attr("loads")(pystring);
+
+            return j;
+
+        }
+
+        py::object getMetadata() {
+            py::object json = py::module_::import("json");
+
+            py::bytes pybytes(getExecutor()->getMetadata());
+            py::str pystring ( pybytes.attr("decode")("utf-8", "ignore"));
+
+            py::object j;
+            j = json.attr("loads")(pystring);
+
+            return j;
+
+        }
+
+        py::object getSchema() {
+            return py::module_::import("json").attr("loads")(getExecutor()->getSchema());
+        }
+
+        std::vector<py::array> getArrays() {
+            std::vector<py::array> output;
+            for (const auto &view: getExecutor()->views()) {
+                PyArrayObject* arr(pdal::python::viewToNumpyArray(view));
+                output.push_back(py::reinterpret_steal<py::array>((PyObject*)arr));
+            }
+            return output;
+        }
+
+        std::vector<py::array> getMeshes() {
+            std::vector<py::array> output;
+            for (const auto &view: getExecutor()->views()) {
+                PyArrayObject* arr(pdal::python::meshToNumpyArray(view->mesh()));
+                output.push_back(py::reinterpret_steal<py::array>((PyObject*)arr));
+            }
+            return output;
+        }
+
+        std::string getJson() const {
+            PYBIND11_OVERRIDE_PURE_NAME(std::string, Pipeline, "toJSON", getJson);
+        }
+
+        bool hasInputs() { return !_inputs.empty(); }
+
+        void copyInputs(const Pipeline& other) { _inputs = other._inputs; }
+
+        void delExecutor() { _executor.reset(); }
+
+        PipelineExecutor* getExecutor() {
+            // We need to acquire the GIL before we create the executor
+            // because this method does Python init stuff but pybind11 doesn't
+            // automatically encapsulate it with a gil_scoped_acquire like it
+            // does for all of the other methods it knows about
+            py::gil_scoped_acquire acquire;
+            if (!_executor)
+                _executor.reset(new PipelineExecutor(getJson(), _inputs, _loglevel));
+            return _executor.get();
+        }
+
+    private:
+        std::unique_ptr<PipelineExecutor> _executor;
+        std::vector<std::shared_ptr<pdal::python::Array>> _inputs;
+        int _loglevel;
+    };
+
+
+
+    PYBIND11_MODULE(libpdalpython, m)
+    {
+        _import_array();
+
+    py::class_<PipelineIterator>(m, "PipelineIterator")
+        .def("__iter__", [](PipelineIterator &it) -> PipelineIterator& { return it; })
+        .def("__next__", &PipelineIterator::executeNext)
+        .def_property_readonly("log", &PipelineIterator::getLog)
+        .def_property_readonly("schema", &PipelineIterator::getSchema)
+        .def_property_readonly("srswkt2", &PipelineIterator::getSrsWKT2)
+        .def_property_readonly("pipeline", &PipelineIterator::getPipeline)
+        .def_property_readonly("metadata", &PipelineIterator::getMetadata);
+
+
+    py::class_<Pipeline>(m, "Pipeline")
+        .def(py::init<>())
+        .def("execute", &Pipeline::execute, py::arg("allowed_dims") =py::list())
+        .def("execute_streaming", &Pipeline::executeStream, "chunk_size"_a=10000, py::arg("allowed_dims") =py::list())
+        .def("iterator", &Pipeline::iterator, "chunk_size"_a=10000, "prefetch"_a=0, py::arg("allowed_dims") =py::list())
+        .def_property("inputs", nullptr, &Pipeline::setInputs)
+        .def_property("loglevel", &Pipeline::getLoglevel, &Pipeline::setLogLevel)
+        .def_property_readonly("log", &Pipeline::getLog)
+        .def_property_readonly("schema", &Pipeline::getSchema)
+        .def_property_readonly("srswkt2", &Pipeline::getSrsWKT2)
+        .def_property_readonly("pipeline", &Pipeline::getPipeline)
+        .def_property_readonly("quickinfo", &Pipeline::getQuickInfo)
+        .def_property_readonly("metadata", &Pipeline::getMetadata)
+        .def_property_readonly("arrays", &Pipeline::getArrays)
+        .def_property_readonly("meshes", &Pipeline::getMeshes)
+        .def_property_readonly("_has_inputs", &Pipeline::hasInputs)
+        .def("_copy_inputs", &Pipeline::copyInputs)
+        .def("toJSON", &Pipeline::getJson)
+        .def("_del_executor", &Pipeline::delExecutor);
+    m.def("getInfo", &getInfo);
+    m.def("getDrivers", &getDrivers);
+    m.def("getOptions", &getOptions);
+    m.def("getDimensions", &getDimensions);
+    m.def("infer_reader_driver", &getReaderDriver);
+    m.def("infer_writer_driver", &getWriterDriver);
+
+    if (pdal::Config::versionMajor() < 2)
+        throw pybind11::import_error("PDAL version must be >= 2.7");
+
+    if (pdal::Config::versionMajor() == 2 && pdal::Config::versionMinor() < 7)
+        throw pybind11::import_error("PDAL version must be >= 2.7");
+    };
+
+}; // namespace pdal
diff --git a/src/pdal/pipeline.py b/src/pdal/pipeline.py
new file mode 100644
index 00000000..60a181c0
--- /dev/null
+++ b/src/pdal/pipeline.py
@@ -0,0 +1,300 @@
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any, Container, Dict, Iterator, List, Optional, Sequence, Union, cast, Callable
+
+import numpy as np
+import pathlib
+
+try:
+    from meshio import Mesh
+except ModuleNotFoundError:  # pragma: no cover
+    Mesh = None
+
+try:
+    from pandas import DataFrame
+except ModuleNotFoundError:  # pragma: no cover
+    DataFrame = None
+
+try:
+    from geopandas import GeoDataFrame, points_from_xy
+except ModuleNotFoundError:  # pragma: no cover
+    GeoDataFrame = points_from_xy = None
+
+from . import drivers, libpdalpython
+
+LogLevelToPDAL = {
+    logging.ERROR: 0,
+    logging.WARNING: 1,
+    logging.INFO: 2,
+    logging.DEBUG: 8,  # pdal::LogLevel::Debug5
+}
+LogLevelFromPDAL = {v: k for k, v in LogLevelToPDAL.items()}
+
+
+class Pipeline(libpdalpython.Pipeline):
+    def __init__(
+        self,
+        spec: Union[None, str, Sequence[Stage]] = None,
+        arrays: Sequence[np.ndarray] = (),
+        loglevel: int = logging.ERROR,
+        json: Optional[str] = None,
+        dataframes: Sequence[DataFrame] = (),
+        stream_handlers: Sequence[Callable[[], int]] = (),
+    ):
+
+        if json:
+            if spec and json:
+                raise ValueError("provide 'spec' or 'json' arguments, not both")
+            spec = json
+
+        # Convert our data frames to Numpy Structured Arrays
+        if dataframes:
+            arrays = [df.to_records() if not "geometry" in df.columns else df.drop(columns=["geometry"]).to_records() for df in dataframes]
+
+        super().__init__()
+        self._stages: List[Stage] = []
+        if spec:
+            stages = _parse_stages(spec) if isinstance(spec, str) else spec
+            for stage in stages:
+                self |= stage
+
+        if stream_handlers:
+            if len(stream_handlers) != len(arrays):
+                raise RuntimeError("stream_handlers must match the number of specified input arrays / dataframes")
+            self.inputs = [(a, h) for a, h in zip(arrays, stream_handlers)]
+        else:
+            self.inputs = [(a, None) for a in arrays]
+
+        self.loglevel = loglevel
+
+    def __getstate__(self):
+        state = self.pipeline
+        return state
+
+    def __setstate__(self, state):
+        self.__init__(state)
+
+    @property
+    def stages(self) -> List[Stage]:
+        return list(self._stages)
+
+    @property
+    def streamable(self) -> bool:
+        return all(stage.streamable for stage in self._stages)
+
+    @property
+    def loglevel(self) -> int:
+        return LogLevelFromPDAL[super().loglevel]
+
+    @loglevel.setter
+    def loglevel(self, value: int) -> None:
+        try:
+            loglevel = LogLevelToPDAL[value]
+        except KeyError:
+            raise ValueError(f"Invalid level {value!r}")
+        # super() property setter is not supported
+        libpdalpython.Pipeline.loglevel.__set__(self, loglevel)
+
+    def __ior__(self, other: Union[Stage, Pipeline]) -> Pipeline:
+        if isinstance(other, Stage):
+            self._stages.append(other)
+        elif isinstance(other, Pipeline):
+            if self._stages and other._has_inputs:
+                raise ValueError(
+                    "A pipeline with inputs cannot follow another pipeline"
+                )
+            self._stages.extend(other._stages)
+        else:
+            raise TypeError(f"Expected Stage or Pipeline, not {other}")
+        self._del_executor()
+        return self
+
+    def __or__(self, other: Union[Stage, Pipeline]) -> Pipeline:
+        new = self.__copy__()
+        new |= other
+        return new
+
+    def __copy__(self) -> Pipeline:
+        clone = self.__class__(loglevel=self.loglevel)
+        clone._copy_inputs(self)
+        clone |= self
+        return clone
+
+    def get_meshio(self, idx: int) -> Optional[Mesh]:
+        if Mesh is None:  # pragma: no cover
+            raise RuntimeError(
+                "The get_meshio function can only be used if you have installed meshio. "
+                "Try pip install meshio"
+            )
+        array = self.arrays[idx]
+        mesh = self.meshes[idx]
+        if len(mesh) == 0:
+            return None
+        return Mesh(
+            np.stack((array["X"], array["Y"], array["Z"]), 1),
+            [("triangle", np.stack((mesh["A"], mesh["B"], mesh["C"]), 1))],
+        )
+
+    def get_dataframe(self, idx: int) -> Optional[DataFrame]:
+        if DataFrame is None:
+            raise RuntimeError("Pandas support requires Pandas to be installed")
+
+        return DataFrame(self.arrays[idx])
+
+    def get_geodataframe(self, idx: int, xyz: bool=False, crs: Any=None) -> Optional[GeoDataFrame]:
+        if GeoDataFrame is None:
+            raise RuntimeError("GeoPandas support requires GeoPandas to be installed")
+        df = DataFrame(self.arrays[idx])
+        coords = [df["X"], df["Y"], df["Z"]] if xyz else [df["X"], df["Y"]]
+        geometry = points_from_xy(*coords)
+        gdf = GeoDataFrame(
+            df,
+            geometry=geometry,
+            crs=crs,
+        )
+        df = coords = geometry = None
+        return gdf
+
+    def _get_json(self) -> str:
+        return self.toJSON()
+
+    def toJSON(self) -> str:
+        options_list = []
+        stage2tag: Dict[Stage, str] = {}
+        stages = self._stages
+        if all(isinstance(stage, Reader) for stage in stages):
+            stages = [*stages, Filter.merge()]
+        for stage in stages:
+            stage2tag[stage] = stage.tag or _generate_tag(stage, stage2tag.values())
+            options = stage.options
+            for option in options:
+                if isinstance(options[option], pathlib.Path):
+                    options[option] = str(options[option])
+            options["tag"] = stage2tag[stage]
+            options["type"] = stage.type
+            inputs = _get_input_tags(stage, stage2tag)
+            if inputs:
+                options["inputs"] = inputs
+            options_list.append(options)
+
+        return json.dumps(options_list)
+
+
+class Stage:
+    def __init__(self, **options: Any):
+        self._options = options
+
+    @property
+    def type(self) -> str:
+        return cast(str, self._options["type"])
+
+    @property
+    def streamable(self) -> bool:
+        return self.type in drivers.StreamableTypes
+
+    @property
+    def tag(self) -> Optional[str]:
+        return self._options.get("tag")
+
+    @property
+    def inputs(self) -> List[Union[Stage, str]]:
+        inputs = self._options.get("inputs", ())
+        return [inputs] if isinstance(inputs, (Stage, str)) else list(inputs)
+
+    @property
+    def options(self) -> Dict[str, Any]:
+        return dict(self._options)
+
+    def pipeline(self, *arrays: np.ndarray, loglevel: int = logging.ERROR) -> Pipeline:
+        return Pipeline((self,), arrays, loglevel)
+
+    def __or__(self, other: Union[Stage, Pipeline]) -> Pipeline:
+        return Pipeline((self, other))
+
+
+class InferableTypeStage(Stage):
+    def __init__(self, filename: Optional[str] = None, **options: Any):
+        if filename:
+            options["filename"] = filename
+        super().__init__(**options)
+
+    @property
+    def type(self) -> str:
+        try:
+            return super().type
+        except KeyError:
+            filename = self._options.get("filename")
+            return str(self._infer_type(filename) if filename else "")
+
+    _infer_type = staticmethod(lambda filename: "")
+
+
+class Reader(InferableTypeStage):
+    _infer_type = staticmethod(libpdalpython.infer_reader_driver)
+
+
+class Filter(Stage):
+    def __init__(self, type: str, **options: Any):
+        super().__init__(type=type, **options)
+
+
+class Writer(InferableTypeStage):
+    _infer_type = staticmethod(libpdalpython.infer_writer_driver)
+
+
+def _parse_stages(text: str) -> Iterator[Stage]:
+    json_stages = json.loads(text)
+    if isinstance(json_stages, dict):
+        json_stages = json_stages.get("pipeline")
+    if not isinstance(json_stages, list):
+        raise ValueError("root element is not a pipeline")
+
+    last = len(json_stages) - 1
+    for i, options in enumerate(json_stages):
+        if not isinstance(options, dict):
+            if isinstance(options, str):
+                options = {"filename": options}
+            else:
+                raise ValueError("A stage element must be string or dict")
+
+        stage_type = options.get("type")
+        if stage_type:
+            is_reader = stage_type.startswith("readers.")
+        else:
+            # The type is inferred from a filename as a reader if it's not
+            # the last stage or if there's only one.
+            is_reader = i == 0 or i != last
+
+        if is_reader:
+            yield Reader(**options)
+        elif not stage_type or stage_type.startswith("writers."):
+            yield Writer(**options)
+        else:
+            yield Filter(**options)
+
+
+def _generate_tag(stage: Stage, tags: Container[str]) -> str:
+    tag_prefix = stage.type.replace(".", "_")
+    i = 1
+    while True:
+        tag = tag_prefix + str(i)
+        if tag not in tags:
+            return tag
+        i += 1
+
+
+def _get_input_tags(stage: Stage, stage2tag: Dict[Stage, str]) -> List[str]:
+    tags = []
+    for input in stage.inputs:
+        if isinstance(input, Stage):
+            try:
+                tags.append(stage2tag[input])
+            except KeyError:
+                raise RuntimeError(
+                    f"Invalid pipeline: Undefined stage " f"{input.tag or input.type!r}"
+                )
+        else:
+            tags.append(input)
+    return tags
diff --git a/test/__init__.py b/test/__init__.py
deleted file mode 100644
index 8a3c8545..00000000
--- a/test/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import sys
-DATADIRECTORY = sys.argv.pop()
-from test.test_pipeline import test_suite
diff --git a/test/data/bad.json b/test/data/bad.json
new file mode 100644
index 00000000..13ee85cb
--- /dev/null
+++ b/test/data/bad.json
@@ -0,0 +1,9 @@
+{
+  "pipeline": [
+    "nofile.las",
+    {
+        "type": "filters.sort",
+        "dimension": "X"
+    }
+  ]
+}
diff --git a/test/data/bad.py b/test/data/bad.py
new file mode 100644
index 00000000..dfb3ec15
--- /dev/null
+++ b/test/data/bad.py
@@ -0,0 +1 @@
+Reader("nofile.las") | Filter.sort(dimension="X")
diff --git a/test/data/chip.py b/test/data/chip.py
new file mode 100644
index 00000000..6b229586
--- /dev/null
+++ b/test/data/chip.py
@@ -0,0 +1 @@
+Reader("test/data/autzen-utm.las") | Filter.chipper(capacity=25) | Writer("auzen-utm-chipped-25.las")
diff --git a/test/data/mesh.json b/test/data/mesh.json
new file mode 100644
index 00000000..e44d9356
--- /dev/null
+++ b/test/data/mesh.json
@@ -0,0 +1,5 @@
+[
+    "test/data/1.2-with-color.las",
+    {"type":  "filters.splitter", "length": 1000}, 
+    {"type":  "filters.delaunay"}
+]
\ No newline at end of file
diff --git a/test/data/mesh.py b/test/data/mesh.py
new file mode 100644
index 00000000..635feea1
--- /dev/null
+++ b/test/data/mesh.py
@@ -0,0 +1 @@
+Reader("test/data/1.2-with-color.las") | Filter.splitter(length=1000) | Filter.delaunay()
diff --git a/test/data/perlin.npy b/test/data/perlin.npy
deleted file mode 100644
index 457a1356..00000000
Binary files a/test/data/perlin.npy and /dev/null differ
diff --git a/test/data/range.json b/test/data/range.json
new file mode 100644
index 00000000..df48ac09
--- /dev/null
+++ b/test/data/range.json
@@ -0,0 +1,7 @@
+[
+  "test/data/autzen-utm.las",
+  {
+    "type": "filters.range",
+    "limits": "Intensity[80:120)"
+  }
+]
diff --git a/test/data/range.py b/test/data/range.py
new file mode 100644
index 00000000..03a8aa26
--- /dev/null
+++ b/test/data/range.py
@@ -0,0 +1 @@
+Reader("test/data/autzen-utm.las") | Filter.range(limits="Intensity[80:120)")
diff --git a/test/data/reproject.json b/test/data/reproject.json
index 87cdc1b3..d0bd3ebd 100644
--- a/test/data/reproject.json
+++ b/test/data/reproject.json
@@ -7,7 +7,7 @@
     {
       "type":"filters.python",
       "function":"filter",
-      "source":"import numpy as np\n\ndef filter(ins,outs):\n\tcls = ins['Classification']\n\n\tkeep_classes = [1]\n\n\t# Use the first test for our base array.\n\tkeep = np.equal(cls, keep_classes[0])\n\n\t# For 1:n, test each predicate and join back\n\t# to our existing predicate array\n\tfor k in range(1,len(keep_classes)):\n\t\tt = np.equal(cls, keep_classes[k])\n\t\tkeep = keep + t\n\n\touts['Mask'] = keep\n\treturn True",
+      "source":"import numpy as np\n\ndef filter(ins,outs):\n\tprint('entered filter()')\n\tcls = ins['Classification']\n\n\tkeep_classes = [1]\n\n\t# Use the first test for our base array.\n\tkeep = np.equal(cls, keep_classes[0])\n\n\t# For 1:n, test each predicate and join back\n\t# to our existing predicate array\n\tfor k in range(1,len(keep_classes)):\n\t\tt = np.equal(cls, keep_classes[k])\n\t\tkeep = keep + t\n\n\touts['Mask'] = keep\n\tprint('exiting filter()')\n\treturn True",
       "module":"anything"
     },
     "out2.las"
diff --git a/test/data/reproject.py b/test/data/reproject.py
new file mode 100644
index 00000000..e74514c0
--- /dev/null
+++ b/test/data/reproject.py
@@ -0,0 +1,28 @@
+(
+    Reader(filename="test/data/1.2-with-color.las", spatialreference="EPSG:2993")
+    |
+    Filter.python(function="filter", module="anything", source="""
+import numpy as np
+
+
+def filter(ins, outs):
+    print("entered filter()")
+    cls = ins["Classification"]
+    keep_classes = [1]
+
+    # Use the first test for our base array.
+    keep = np.equal(cls, keep_classes[0])
+
+    # For 1:n, test each predicate and join back
+    # to our existing predicate array
+    for k in range(1, len(keep_classes)):
+        t = np.equal(cls, keep_classes[k])
+        keep = keep + t
+
+    outs["Mask"] = keep
+    print("exiting filter()")
+    return True
+""")
+    |
+    Writer("out2.las")
+)
diff --git a/test/data/simple.laz b/test/data/simple.laz
new file mode 100644
index 00000000..6f774c5b
Binary files /dev/null and b/test/data/simple.laz differ
diff --git a/test/data/sort.py b/test/data/sort.py
new file mode 100644
index 00000000..8bc741a1
--- /dev/null
+++ b/test/data/sort.py
@@ -0,0 +1 @@
+Reader("test/data/1.2-with-color.las") | Filter.sort(dimension="X")
diff --git a/test/test_pio.py b/test/test_pio.py
deleted file mode 100644
index bcde6bcb..00000000
--- a/test/test_pio.py
+++ /dev/null
@@ -1,57 +0,0 @@
-import unittest
-import json
-
-from pdal import pio
-
-dummy_pipeline = """{
-  "pipeline": [
-    {
-      "type": "readers.ply",
-      "filename": "dummyinput.ply"
-    },
-    {
-      "type": "filters.outlier",
-      "method": "statistical",
-      "mean_k": 16,
-      "multiplier": 1.0
-    },
-    {
-      "type": "filters.range",
-      "limits": "Classification![7:7]"
-    },
-    {
-      "type": "filters.normal"
-    },
-    {
-      "type": "writers.ply",
-      "storage_mode": "ascii",
-      "precision": 4,
-      "filename": "dummyoutput.ply",
-      "dims": "X,Y,Z,Red,Green,Blue,NormalX,NormalY,NormalZ"
-    }
-  ]
-}"""
-
-
-
-class TestPIOBasics(unittest.TestCase):
-    def test_pipeline_construction(self):
-        pipeline = (pio.readers.ply(filename="dummyinput.ply") +
-                    pio.filters.outlier(method="statistical", mean_k=16, multiplier=1.0) +
-                    pio.filters.range(limits="Classification![7:7]") +
-                    pio.filters.normal() + pio.writers.ply(storage_mode="ascii", precision=4, filename="dummyoutput.ply",
-                                                           dims="X,Y,Z,Red,Green,Blue,NormalX,NormalY,NormalZ"))
-
-        self.assertIsInstance(pipeline, pio.PipelineSpec)
-        self.assertEqual(len(list(pipeline.stages)), 5)
-        self.assertEqual(json.dumps(pipeline.spec, indent=2), dummy_pipeline)
-
-        auto_reader = pio.readers.auto(filename="dummyinput.las")
-        auto_writer = pio.writers.auto(filename="dummyoutput.las")
-
-        self.assertIn("filename", auto_reader.spec)
-        self.assertNotIn("type", auto_reader.spec)
-        self.assertIn("filename", auto_reader.spec)
-        self.assertNotIn("type", auto_writer.spec)
-        self.assertEqual(auto_reader.prefix, "readers")
-        self.assertEqual(auto_writer.prefix, "writers")
diff --git a/test/test_pipeline.py b/test/test_pipeline.py
index e16124d9..46e0a18d 100644
--- a/test/test_pipeline.py
+++ b/test/test_pipeline.py
@@ -1,176 +1,456 @@
-import unittest
-import pdal
+import json
+import logging
 import os
+import sys
+
+from typing import List
+from itertools import product
 import numpy as np
-from packaging.version import Version
+import pytest
+
+import pdal
+import pathlib
+
+DATADIRECTORY = os.path.join(os.path.dirname(__file__), "data")
+
 
-DATADIRECTORY = "./test/data"
+def a_filter(ins, outs):
+    return True
 
-bad_json = u"""
-{
-  "pipeline": [
-    "nofile.las",
-    {
-        "type": "filters.sort",
-        "dimension": "X"
-    }
-  ]
-}
-"""
 
+def compare_structured_arrays(arr1, arr2):
+    for field in arr1.dtype.names:
+        equal = np.all(np.equal(arr1[field], arr2[field]))
+        if not equal:
+            return False
+    return True
 
+def get_pipeline(filename):
+    with open(os.path.join(DATADIRECTORY, filename), "r") as f:
+        if filename.endswith(".json"):
+            pipeline = pdal.Pipeline(f.read())
+        elif filename.endswith(".py"):
+            pipeline = eval(f.read(), vars(pdal))
+    return pipeline
 
-class PDALTest(unittest.TestCase):
 
-    def fetch_json(self, filename):
-        import os
-        fn = DATADIRECTORY + os.path.sep +  filename
-        output = ''
-        with open(fn, 'rb') as f:
-            output = f.read().decode('UTF-8')
-        return output
+def test_dimensions():
+    """Ask PDAL for its valid dimensions list"""
+    dims = pdal.dimensions
+    assert len(dims) > 0
 
-class TestPipeline(PDALTest):
 
-    @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')),
-                         "missing test data")
-    def test_construction(self):
+class TestPipeline:
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_construction(self, filename):
         """Can we construct a PDAL pipeline"""
-        json = self.fetch_json('sort.json')
-        r = pdal.Pipeline(json)
+        assert isinstance(get_pipeline(filename), pdal.Pipeline)
 
-    @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')),
-                         "missing test data")
-    def test_execution(self):
+        # construct Pipeline from a sequence of stages
+        r = pdal.Reader("r")
+        f = pdal.Filter("f")
+        for spec in (r, f), [r, f]:
+            p = pdal.Pipeline(spec)
+            assert isinstance(p, pdal.Pipeline)
+            assert len(p.stages) == 2
+
+    @pytest.mark.parametrize(
+        "pipeline",
+        [
+            "{}",
+            '{"foo": []}',
+            "[1, 2]",
+            '{"pipeline": [["a.las", "b.las"], "c.las"]}',
+        ],
+    )
+    def test_invalid_json(self, pipeline):
+        """Do we complain with bad pipelines"""
+        json.loads(pipeline)
+        with pytest.raises(ValueError):
+            pdal.Pipeline(pipeline)
+
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_execute(self, filename):
         """Can we execute a PDAL pipeline"""
-        x = self.fetch_json('sort.json')
-        r = pdal.Pipeline(x)
-        r.validate()
-        r.execute()
-        self.assertGreater(len(r.pipeline), 200)
+        r = get_pipeline(filename)
+        count = r.execute()
+        assert count == 1065
+
+    @pytest.mark.parametrize("filename", ["range.json", "range.py"])
+    def test_execute_streaming(self, filename):
+        r = get_pipeline(filename)
+        assert r.streamable
+        count = r.execute()
+        count2 = r.execute_streaming(chunk_size=100)
+        assert count == count2
+
 
-    def test_validate(self):
+    @pytest.mark.parametrize("filename", ["range.json", "range.py"])
+    def test_subsetstreaming(self, filename):
+        """Can we fetch a subset of PDAL dimensions as a numpy array while streaming"""
+        r = get_pipeline(filename)
+        limit = ['X','Y','Z','Intensity']
+        arrays = list(r.iterator(chunk_size=100,allowed_dims=limit))
+        assert len(arrays) == 11
+        assert len(arrays[0].dtype) == 4
+
+
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_execute_streaming_non_streamable(self, filename):
+        r = get_pipeline(filename)
+        assert not r.streamable
+        with pytest.raises(RuntimeError) as info:
+            r.execute_streaming()
+        assert "Attempting to use stream mode" in str(info.value)
+
+    @pytest.mark.parametrize("filename", ["bad.json", "bad.py"])
+    def test_validate(self, filename):
         """Do we complain with bad pipelines"""
-        r = pdal.Pipeline(bad_json)
-        with self.assertRaises(RuntimeError):
-            r.validate()
+        r = get_pipeline(filename)
+        with pytest.raises(RuntimeError) as info:
+            r.execute()
+        if os.name == "nt":
+            assert "Unable to open stream for" in str(info.value)
+        else:
+            assert "No such file or directory" in str(info.value)
 
-    @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')),
-                         "missing test data")
-    def test_array(self):
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_array(self, filename):
         """Can we fetch PDAL data as a numpy array"""
-        json = self.fetch_json('sort.json')
-        r = pdal.Pipeline(json)
-        r.validate()
+        r = get_pipeline(filename)
         r.execute()
         arrays = r.arrays
-        self.assertEqual(len(arrays), 1)
+        assert len(arrays) == 1
 
         a = arrays[0]
-        self.assertAlmostEqual(a[0][0], 635619.85, 7)
-        self.assertAlmostEqual(a[1064][2], 456.92, 7)
+        assert a[0][0] == 635619.85
+        assert a[1064][2] == 456.92
+
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_subsetarray(self, filename):
+        """Can we fetch a subset of PDAL dimensions as a numpy array"""
+        r = get_pipeline(filename)
+        limit = ['X','Y','Z']
+        r.execute(allowed_dims=limit)
+        arrays = r.arrays
+        assert len(arrays) == 1
+        assert len(arrays[0].dtype) == 3
+
 
-    @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')),
-                         "missing test data")
-    def test_metadata(self):
+
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_metadata(self, filename):
         """Can we fetch PDAL metadata"""
-        json = self.fetch_json('sort.json')
-        r = pdal.Pipeline(json)
-        r.validate()
+        r = get_pipeline(filename)
+        with pytest.raises(RuntimeError) as info:
+            r.metadata
+        assert "Pipeline has not been executed" in str(info.value)
+
         r.execute()
-        metadata = r.metadata
-        import json
-        j = json.loads(metadata)
-        self.assertEqual(j["metadata"]["readers.las"][0]["count"], 1065)
+        assert r.metadata["metadata"]["readers.las"]["count"] == 1065
 
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_schema(self, filename):
+        """Fetching a schema works"""
+        r = get_pipeline(filename)
+        with pytest.raises(RuntimeError) as info:
+            r.schema
+        assert "Pipeline has not been executed" in str(info.value)
+
+        r.execute()
+        assert r.schema["schema"]["dimensions"][0]["name"] == "X"
 
-    @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')),
-                         "missing test data")
-    def test_no_execute(self):
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_pipeline(self, filename):
+        """Can we fetch PDAL pipeline string"""
+        r = get_pipeline(filename)
+        r.execute()
+        # filename might be an object in PDAL 2.9+
+        # https://github.com/PDAL/PDAL/issues/4751
+
+        returned = json.loads(r.pipeline)
+        expected = { "pipeline": [
+                {
+                    "filename": "test/data/1.2-with-color.las",
+                    "tag": "readers_las1",
+                    "type": "readers.las",
+                },
+                {
+                    "dimension": "X",
+                    "inputs": ["readers_las1"],
+                    "tag": "filters_sort1",
+                    "type": "filters.sort",
+                },
+            ]
+        }
+        try:
+            assert returned['pipeline'][0]['filename'] == "test/data/1.2-with-color.las"
+        except AttributeError:
+            assert returned['pipeline'][0]['filename']['path'] == "test/data/1.2-with-color.las"
+
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_no_execute(self, filename):
         """Does fetching arrays without executing throw an exception"""
-        json = self.fetch_json('sort.json')
-        r = pdal.Pipeline(json)
-        with self.assertRaises(RuntimeError):
+        r = get_pipeline(filename)
+        with pytest.raises(RuntimeError) as info:
             r.arrays
-#
-#    @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'reproject.json')),
-#                         "missing test data")
-#    def test_logging(self):
-#        """Can we fetch log output"""
-#        json = self.fetch_json('reproject.json')
-#        r = pdal.Pipeline(json)
-#        r.loglevel = 8
-#        r.validate()
-#        count = r.execute()
-#        self.assertEqual(count, 789)
-#        self.assertEqual(r.log.split()[0], '(pypipeline')
-#
-    @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')),
-                         "missing test data")
-    def test_schema(self):
-        """Fetching a schema works"""
-        json = self.fetch_json('sort.json')
-        r = pdal.Pipeline(json)
-        r.validate()
-        r.execute()
-        self.assertEqual(r.schema['schema']['dimensions'][0]['name'], 'X')
+        assert "Pipeline has not been executed" in str(info.value)
 
-    @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'chip.json')),
-                         "missing test data")
-    def test_merged_arrays(self):
-        """Can we fetch multiple point views from merged PDAL data """
-        json = self.fetch_json('chip.json')
-        r = pdal.Pipeline(json)
-        r.validate()
+    @pytest.mark.parametrize("filename", ["chip.json", "chip.py"])
+    def test_merged_arrays(self, filename):
+        """Can we fetch multiple point views from merged PDAL data"""
+        r = get_pipeline(filename)
         r.execute()
         arrays = r.arrays
-        self.assertEqual(len(arrays), 43)
+        assert len(arrays) == 43
+
+    @pytest.mark.parametrize("filename", ["chip.json", "chip.py"])
+    def test_stages(self, filename):
+        """Can we break up a pipeline as a sequence of stages"""
+        stages = pdal.Reader("test/data/autzen-utm.las").pipeline().stages
+        assert len(stages) == 1
+
+        stages = get_pipeline(filename).stages
+        assert len(stages) == 3
+
+        assert isinstance(stages[0], pdal.Reader)
+        assert stages[0].type == "readers.las"
+
+        assert isinstance(stages[1], pdal.Filter)
+        assert stages[1].type == "filters.chipper"
+
+        assert isinstance(stages[2], pdal.Writer)
+        assert stages[2].type == "writers.las"
+
+    def test_pipe_stages(self):
+        """Can we build a pipeline by piping stages together"""
+        read = pdal.Reader("test/data/autzen-utm.las")
+        frange = pdal.Filter.range(limits="Intensity[50:200)")
+        fsplitter = pdal.Filter.splitter(length=1000)
+        fdelaunay = pdal.Filter.delaunay(inputs=[frange, fsplitter])
+
+        # pipe stages together
+        pipeline = read | frange | fsplitter | fdelaunay
+        pipeline.execute()
+
+        # pipe a pipeline to a stage
+        pipeline = read | (frange | fsplitter | fdelaunay)
+        pipeline.execute()
+
+        # pipe a pipeline to a pipeline
+        pipeline = (read | frange) | (fsplitter | fdelaunay)
+        pipeline.execute()
+
+    def test_pipe_stage_errors(self):
+        """Do we complain with piping invalid objects"""
+        r = pdal.Reader("r", tag="r")
+        f = pdal.Filter("f")
+        w = pdal.Writer("w", inputs=["r", f])
+
+        with pytest.raises(TypeError):
+            r | (f, w)
+        with pytest.raises(TypeError):
+            (r, f) | w
+        with pytest.raises(TypeError):
+            (r, f) | (f, w)
+
+        pipeline = r | w
+        with pytest.raises(RuntimeError) as info:
+            pipeline.execute()
+        assert "Undefined stage 'f'" in str(info.value)
 
+    def test_inputs(self):
+        """Can we combine pipelines with inputs"""
+        data = np.load(os.path.join(DATADIRECTORY, "test3d.npy"))
+        f = pdal.Filter.splitter(length=1000)
+        pipeline = f.pipeline(data)
+        pipeline.execute()
 
-class TestArrayLoad(PDALTest):
+        # a pipeline with inputs can be followed by stage/pipeline
+        (pipeline | pdal.Writer.null()).execute()
+        (pipeline | (f | pdal.Writer.null())).execute()
 
-    @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'perlin.npy')),
-            "missing test data")
+        # a pipeline with inputs cannot follow another stage/pipeline
+        with pytest.raises(ValueError):
+            pdal.Reader("r") | pipeline
+        with pytest.raises(ValueError):
+            (pdal.Reader("r") | f) | pipeline
+
+    def test_infer_stage_type(self):
+        """Can we infer stage type from the filename"""
+        assert pdal.Reader("foo.las").type == "readers.las"
+        assert pdal.Writer("foo.las").type == "writers.las"
+        assert pdal.Reader("foo.xxx").type == ""
+        assert pdal.Writer("foo.xxx").type == ""
+        assert pdal.Reader().type == ""
+        assert pdal.Writer().type == ""
+
+    def test_streamable(self):
+        """Can we distinguish streamable from non-streamable stages and pipeline"""
+        rs = pdal.Reader(type="readers.las", filename="foo")
+        assert rs.streamable is True
+        assert pdal.Reader.las("foo").streamable is True
+        assert pdal.Reader("foo.las").streamable is True
+
+        rn = pdal.Reader(type="readers.pts", filename="foo")
+        assert rn.streamable is False
+        assert pdal.Reader.pts("foo").streamable is False
+        assert pdal.Reader("foo.pts").streamable is False
+
+        fs = pdal.Filter(type="filters.crop")
+        assert fs.streamable is True
+        assert pdal.Filter.crop().streamable is True
+
+        fn = pdal.Filter(type="filters.cluster")
+        assert fn.streamable is False
+        assert pdal.Filter.cluster().streamable is False
+
+        ws = pdal.Writer(type="writers.ogr", filename="foo")
+        assert ws.streamable is True
+        assert pdal.Writer.ogr(filename="foo").streamable is True
+        assert pdal.Writer("foo.shp").streamable is True
+
+        wn = pdal.Writer(type="writers.glb", filename="foo")
+        assert wn.streamable is False
+        assert pdal.Writer.gltf("foo").streamable is False
+        assert pdal.Writer("foo.glb").streamable is False
+
+        assert (rs | fs | ws).streamable is True
+        assert (rn | fs | ws).streamable is False
+        assert (rs | fn | ws).streamable is False
+        assert (rs | fs | wn).streamable is False
+
+    @pytest.mark.parametrize("filename", ["chip.json", "chip.py"])
+    def test_logging(self, filename):
+        """Can we fetch log output"""
+        r = get_pipeline(filename)
+        assert r.loglevel == logging.ERROR
+        assert r.log == ""
+
+        for loglevel in logging.CRITICAL, -1:
+            with pytest.raises(ValueError):
+                r.loglevel = loglevel
+
+        count = r.execute()
+        assert count == 1065
+        assert r.log == ""
+
+        r.loglevel = logging.DEBUG
+        assert r.loglevel == logging.DEBUG
+        count = r.execute()
+        assert count == 1065
+        assert "(pypipeline readers.las Debug)" in r.log
+        assert "(pypipeline Debug) Executing pipeline in standard mode" in r.log
+        assert "(pypipeline writers.las Debug)" in r.log
+
+    @pytest.mark.skipif(
+        not hasattr(pdal.Filter, "python"),
+        reason="filters.python PDAL plugin is not available",
+    )
+    @pytest.mark.parametrize("filename", ["reproject.json", "reproject.py"])
+    def test_logging_filters_python(self, filename):
+        """Can we fetch log output including print() statements from filters.python"""
+        r = get_pipeline(filename)
+        assert r.loglevel == logging.ERROR
+        assert r.log == ""
+
+        for loglevel in logging.CRITICAL, -1:
+            with pytest.raises(ValueError):
+                r.loglevel = loglevel
+
+        count = r.execute()
+        assert count == 789
+        assert r.log == "entered filter()\n" + "exiting filter()\n"
+
+        r.loglevel = logging.DEBUG
+        assert r.loglevel == logging.DEBUG
+        count = r.execute()
+        assert count == 789
+        assert "(pypipeline readers.las Debug)" in r.log
+        assert "(pypipeline filters.python Debug)" in r.log
+        assert "\nentered filter()\n" in r.log
+        assert "\nexiting filter()\n" in r.log
+        assert "(pypipeline writers.las Debug)" in r.log
+
+    @pytest.mark.skipif(
+        not hasattr(pdal.Filter, "python"),
+        reason="filters.python PDAL plugin is not available",
+    )
+    def test_filters_python(self):
+        r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
+        f = pdal.Filter.python(script=__file__, function="a_filter", module="anything")
+        count = (r | f).execute()
+        assert count == 1065
+
+    def test_only_readers(self):
+        """Does a pipeline that consists of only readers return the merged data"""
+        read = pdal.Reader("test/data/*.las")
+        r1 = read.pipeline()
+        count1 = r1.execute()
+        array1 = r1.arrays[0]
+
+        r2 = read | read
+        count2 = r2.execute()
+        array2 = r2.arrays[0]
+
+        assert count2 == 2 * count1
+        np.testing.assert_array_equal(np.concatenate([array1, array1]), array2)
+
+    def test_quickinfo(self):
+        r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
+        p = r.pipeline()
+        info = p.quickinfo
+        assert 'readers.las' in info.keys()
+        assert info['readers.las']['num_points'] == 1065
+
+    def test_quickinfo_offsets_scales(self):
+        r = pdal.Reader(os.path.join(DATADIRECTORY,"simple.laz"))
+        p = r.pipeline()
+        info = p.quickinfo
+        assert 'readers.las' in info.keys()
+        assert 'offset_x' in info['readers.las']['metadata'].keys()
+        assert 'scale_x' in info['readers.las']['metadata'].keys()
+        assert info['readers.las']['num_points'] == 1065
+
+    def test_jsonkwarg(self):
+        pipeline = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las")).pipeline().toJSON()
+        r = pdal.Pipeline(json=pipeline)
+        p = r.pipeline
+        assert 'readers.las' in p
+
+
+
+class TestArrayLoad:
     def test_merged_arrays(self):
         """Can we load data from a list of arrays to PDAL"""
-        if Version(pdal.info.version) < Version('1.8'):
-            return True
-        data = np.load(os.path.join(DATADIRECTORY, 'test3d.npy'))
-
+        data = np.load(os.path.join(DATADIRECTORY, "test3d.npy"))
         arrays = [data, data, data]
-
-        json = self.fetch_json('chip.json')
-        chip =u"""{
-  "pipeline":[
-    {
-      "type":"filters.range",
-      "limits":"Intensity[100:300)"
-    }
-  ]
-}"""
-
-        p = pdal.Pipeline(chip, arrays)
-        p.loglevel = 8
-        count = p.execute()
+        filter_intensity = """{
+          "pipeline":[
+            {
+              "type":"filters.range",
+              "limits":"Intensity[100:300)"
+            }
+          ]
+        }"""
+        p = pdal.Pipeline(filter_intensity, arrays)
+        p.execute()
         arrays = p.arrays
-        self.assertEqual(len(arrays), 3)
+        assert len(arrays) == 3
 
         for data in arrays:
-            self.assertEqual(len(data), 12)
-            self.assertEqual(data['Intensity'].sum(), 1926)
+            assert len(data) == 12
+            assert data["Intensity"].sum() == 1926
 
     def test_read_arrays(self):
         """Can we read and filter data from a list of arrays to PDAL"""
-        if Version(pdal.info.version) < Version('1.8'):
-            return True
-
         # just some dummy data
         x_vals = [1.0, 2.0, 3.0, 4.0, 5.0]
         y_vals = [6.0, 7.0, 8.0, 9.0, 10.0]
         z_vals = [1.5, 3.5, 5.5, 7.5, 9.5]
         test_data = np.array(
             [(x, y, z) for x, y, z in zip(x_vals, y_vals, z_vals)],
-            dtype=[('X', np.float), ('Y', np.float), ('Z', np.float)]
+            dtype=[("X", float), ("Y", float), ("Z", float)],
         )
 
         pipeline = """
@@ -183,25 +463,432 @@ def test_read_arrays(self):
             ]
         }
         """
+        p = pdal.Pipeline(pipeline, arrays=[test_data])
+        count = p.execute()
+        arrays = p.arrays
+        assert count == 2
+        assert len(arrays) == 1
+
+    def test_reference_counting(self):
+        """Can we read and filter data from a list of arrays to PDAL"""
+        # just some dummy data
+        x_vals = [1.0, 2.0, 3.0, 4.0, 5.0]
+        y_vals = [6.0, 7.0, 8.0, 9.0, 10.0]
+        z_vals = [1.5, 3.5, 5.5, 7.5, 9.5]
+        test_data = np.array(
+            [(x, y, z) for x, y, z in zip(x_vals, y_vals, z_vals)],
+            dtype=[("X", float), ("Y", float), ("Z", float)],
+        )
 
-        p = pdal.Pipeline(pipeline, arrays=[test_data,])
-        p.loglevel = 8
+        pipeline = """
+        {
+            "pipeline": [
+                {
+                    "type":"filters.range",
+                    "limits":"X[2.5:4.5]"
+                }
+            ]
+        }
+        """
+        p = pdal.Pipeline(pipeline, arrays=[test_data])
         count = p.execute()
+        assert count == 2
+        refcount = sys.getrefcount(p.arrays[0])
+        assert refcount == 1
+
+
+
+class TestMesh:
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_no_execute(self, filename):
+        """Does fetching meshes without executing throw an exception"""
+        r = get_pipeline(filename)
+        with pytest.raises(RuntimeError) as info:
+            r.meshes
+        assert "Pipeline has not been executed" in str(info.value)
+
+    @pytest.mark.parametrize("filename", ["mesh.json", "mesh.py"])
+    def test_mesh(self, filename):
+        """Can we fetch PDAL face data as a numpy array"""
+        r = get_pipeline(filename)
+        r.execute()
+        meshes = r.meshes
+        assert len(meshes) == 24
+
+        m = meshes[0]
+        assert str(m.dtype) == "[('A', '<u4'), ('B', '<u4'), ('C', '<u4')]"
+        assert len(m) == 134
+        assert m[0][0] == 29
+
+    @pytest.mark.parametrize("filename", ["mesh.json", "mesh.py"])
+    def test_meshio(self, filename):
+        r = get_pipeline(filename)
+        r.execute()
+        mesh = r.get_meshio(0)
+        triangles = mesh.cells_dict["triangle"]
+        assert len(triangles) == 134
+        assert triangles[0][0] == 29
+
+    def test_pathlib(self):
+        """Can we build a pipeline using pathlib.Path as the filenames"""
+        path = pathlib.Path("test/data/autzen-utm.las")
+        read = pdal.Reader(path)
+        pipeline = read.pipeline()
+        pipeline.execute()
+
+
+class TestDataFrame:
+
+    @pytest.mark.skipif(
+        not pdal.pipeline.DataFrame,
+        reason="pandas is not available",
+    )
+    def test_fetch(self):
+        r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
+        p = r.pipeline()
+        p.execute()
+        df = p.get_dataframe(0)
+        assert len(df) == 1065
+        assert len(df.columns) == 20
+
+    @pytest.mark.skipif(
+        not pdal.pipeline.DataFrame,
+        reason="pandas is not available",
+    )
+    def test_load(self):
+        r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
+        p = r.pipeline()
+        p.execute()
+        data = p.arrays[0]
+        df = pdal.pipeline.DataFrame
+        dataframes = [df(data), df(data), df(data)]
+        filter_intensity = """{
+          "pipeline":[
+            {
+              "type":"filters.range",
+              "limits":"Intensity[100:300)"
+            }
+          ]
+        }"""
+        p = pdal.Pipeline(filter_intensity, dataframes = dataframes)
+        p.execute()
         arrays = p.arrays
-        self.assertEqual(count, 2)
-        self.assertEqual(len(arrays), 1)
+        assert len(arrays) == 3
+
+        # We copied the array three times. Sum the Intensity values
+        # post filtering to see if we had our intended effect
+        for data in arrays:
+            assert len(data) == 387
+            assert data["Intensity"].sum() == 57684
+
+
+class TestGeoDataFrame:
+
+    @pytest.mark.skipif(
+        not pdal.pipeline.GeoDataFrame,
+        reason="geopandas is not available",
+    )
+    def test_fetch(self):
+        r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
+        p = r.pipeline()
+        p.execute()
+        record_count = p.arrays[0].shape[0]
+        dimension_count = len(p.arrays[0].dtype)
+        gdf = p.get_geodataframe(0)
+        gdf_xyz = p.get_geodataframe(0, xyz=True)
+        gdf_crs = p.get_geodataframe(0, crs="EPSG:4326")
+        assert len(gdf) == record_count
+        assert len(gdf.columns) == dimension_count + 1
+        assert isinstance(gdf, pdal.pipeline.GeoDataFrame)
+        assert gdf.geometry.is_valid.all()
+        assert not gdf.geometry.is_empty.any()
+        assert gdf.crs is None
+        assert gdf.geometry.z.isna().all()
+        assert not gdf_xyz.geometry.z.isna().any()
+        assert gdf_crs.crs.srs == "EPSG:4326"
+
+    @pytest.mark.skipif(
+        not pdal.pipeline.GeoDataFrame,
+        reason="geopandas is not available",
+    )
+    def test_load(self):
+        r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
+        p = r.pipeline()
+        p.execute()
+        data = p.arrays[0]
+        gdf = pdal.pipeline.GeoDataFrame(
+            data,
+            geometry=pdal.pipeline.points_from_xy(data["X"], data["Y"], data["Z"])
+        )
+        dataframes = [gdf, gdf, gdf]
+        filter_intensity = """{
+          "pipeline":[
+            {
+              "type":"filters.range",
+              "limits":"Intensity[100:300)"
+            }
+          ]
+        }"""
+        p = pdal.Pipeline(filter_intensity, dataframes = dataframes)
+        p.execute()
+        arrays = p.arrays
+        assert len(arrays) == 3
+
+        # We copied the array three times. Sum the Intensity values
+        # post filtering to see if we had our intended effect
+        for data in arrays:
+            assert len(data) == 387
+            assert data["Intensity"].sum() == 57684
+
+
+class TestPipelineIterator:
+    @pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
+    def test_non_streamable(self, filename):
+        r = get_pipeline(filename)
+        assert not r.streamable
+        with pytest.raises(RuntimeError) as info:
+            next(r.iterator(chunk_size=100))
+        assert "Attempting to use stream mode" in str(info.value)
+
+    @pytest.mark.parametrize("filename", ["range.json", "range.py"])
+    def test_array(self, filename):
+        """Can we fetch PDAL data as numpy arrays"""
+        r = get_pipeline(filename)
+        count = r.execute()
+        arrays = r.arrays
+        assert len(arrays) == 1
+        array = arrays[0]
+        assert count == len(array)
+
+        for _ in range(10):
+            arrays = list(r.iterator(chunk_size=100))
+            assert len(arrays) == 11
+            concat_array = np.concatenate(arrays)
+            assert compare_structured_arrays(np.concatenate(arrays), concat_array)
+
+    @pytest.mark.parametrize("filename", ["range.json", "range.py"])
+    def test_StopIteration(self, filename):
+        """Is StopIteration raised when the iterator is exhausted"""
+        r = get_pipeline(filename)
+        it = r.iterator(chunk_size=100)
+        for array in it:
+            assert isinstance(array, np.ndarray)
+        with pytest.raises(StopIteration):
+            next(it)
+        assert next(it, None) is None
+
+    @pytest.mark.parametrize("filename", ["range.json", "range.py"])
+    def test_metadata(self, filename):
+        """Can we fetch PDAL metadata"""
+        r = get_pipeline(filename)
+        r.execute()
+
+        it = r.iterator(chunk_size=100)
+        for _ in it:
+            pass
+
+        assert r.metadata == it.metadata
+
+    @pytest.mark.parametrize("filename", ["range.json", "range.py"])
+    def test_schema(self, filename):
+        """Fetching a schema works"""
+        r = get_pipeline(filename)
+        r.execute()
+
+        it = r.iterator(chunk_size=100)
+        for _ in it:
+            pass
 
+        assert r.schema == it.schema
 
-class TestDimensions(PDALTest):
-    def test_fetch_dimensions(self):
-        """Ask PDAL for its valid dimensions list"""
-        dims = pdal.dimensions
-        self.assertLess(len(dims), 100)
-        self.assertGreater(len(dims), 71)
+    def test_merged_arrays(self):
+        """Can we load data from a list of arrays to PDAL"""
+        data = np.load(os.path.join(DATADIRECTORY, "test3d.npy"))
+        arrays = [data, data, data]
+        filter_intensity = """{
+          "pipeline":[
+            {
+              "type":"filters.range",
+              "limits":"Intensity[100:300)"
+            }
+          ]
+        }"""
+        p = pdal.Pipeline(filter_intensity, arrays)
+        p.execute()
+        non_streaming_array = np.concatenate(p.arrays)
+        for chunk_size in range(5, 100, 5):
+            streaming_arrays = list(p.iterator(chunk_size=chunk_size))
+            assert compare_structured_arrays(np.concatenate(streaming_arrays), non_streaming_array)
+
+    @pytest.mark.parametrize("filename", ["range.json", "range.py"])
+    def test_premature_exit(self, filename):
+        """Can we stop iterating before all arrays are fetched"""
+        r = get_pipeline(filename)
+        r.execute()
+        assert len(r.arrays) == 1
+        array = r.arrays[0]
+
+        # the dtype ordering of these arrays are not going to be the
+        # same. Use our testing method to compare them.
+
+        for _ in range(10):
+            for array2 in r.iterator(chunk_size=100):
+                assert compare_structured_arrays(array2, array[: len(array2)])
+                break
+
+    @pytest.mark.parametrize("filename", ["range.json", "range.py"])
+    def test_multiple_iterators(self, filename):
+        """Can we create multiple independent iterators"""
+        r = get_pipeline(filename)
+        it1 = r.iterator(chunk_size=100)
+        it2 = r.iterator(chunk_size=100)
+        for a1, a2 in zip(it1, it2):
+            np.testing.assert_array_equal(a1, a2)
+        assert next(it1, None) is None
+        assert next(it2, None) is None
+
+
+def gen_chunk(count, random_seed = 12345):
+    rng = np.random.RandomState(count*random_seed)
+    # Generate dummy data
+    result = np.zeros(count, dtype=[("X", float), ("Y", float), ("Z", float)])
+    result['X'][:] = rng.uniform(-2, -1, count)
+    result['Y'][:] = rng.uniform(1, 2, count)
+    result['Z'][:] = rng.uniform(3, 4, count)
+    return result
+
+
+class TestPipelineInputStreams():
+
+    # Test cases
+    ONE_ARRAY_FULL = [[gen_chunk(1234)]]
+    MULTI_ARRAYS_FULL = [*ONE_ARRAY_FULL, [gen_chunk(4321)]]
+
+    ONE_ARRAY_STREAMED = [[gen_chunk(10), gen_chunk(7), gen_chunk(3), gen_chunk(5), gen_chunk(1)]]
+    MULTI_ARRAYS_STREAMED = [*ONE_ARRAY_STREAMED, [gen_chunk(5), gen_chunk(2), gen_chunk(3), gen_chunk(1)]]
+
+    MULTI_ARRAYS_MIXED = [
+        *MULTI_ARRAYS_STREAMED,
+        *MULTI_ARRAYS_FULL
+    ]
+
+    @pytest.mark.parametrize("in_arrays_chunks, use_setter", [
+        (arrays_chunks, use_setter) for arrays_chunks, use_setter in product([
+            ONE_ARRAY_FULL, MULTI_ARRAYS_FULL,
+            ONE_ARRAY_STREAMED, MULTI_ARRAYS_STREAMED,
+            MULTI_ARRAYS_MIXED
+        ], ['False', 'True'])
+    ])
+    def test_pipeline_run(self, in_arrays_chunks, use_setter):
+        """
+        Test case to validate possible usages:
+        - Combining "full" arrays and "streamed" ones
+        - Setting input arrays through the Pipeline constructor or the setter
+        """
+        # Assuming stream mode for lists that contain more than one chunk.
+        # And that first chunk is the biggest of all, to simplify input buffer size creation.
+        in_arrays = [
+            np.zeros(chunks[0].shape, chunks[0].dtype) if len(chunks) > 1 else chunks[0]
+            for chunks in in_arrays_chunks
+        ]
+
+        def get_stream_handler(in_array, in_array_chunks):
+            in_array_chunks_it = iter(in_array_chunks)
+            def load_next_chunk():
+                try:
+                    next_chunk = next(in_array_chunks_it)
+                except StopIteration:
+                    return 0
+
+                chunk_size = next_chunk.size
+                in_array[:chunk_size]["X"] = next_chunk[:]["X"]
+                in_array[:chunk_size]["Y"] = next_chunk[:]["Y"]
+                in_array[:chunk_size]["Z"] = next_chunk[:]["Z"]
+
+                return chunk_size
+
+            return load_next_chunk
+
+        stream_handlers = [
+            get_stream_handler(arr, chunks) if len(chunks) > 1 else None
+            for arr, chunks in zip(in_arrays, in_arrays_chunks)
+        ]
+
+        expected_count = sum([sum([len(c) for c in chunks]) for chunks in in_arrays_chunks])
+
+        pipeline = """
+        {
+            "pipeline": [{
+                "type": "filters.stats"
+            }]
+        }
+        """
+        if use_setter:
+            p = pdal.Pipeline(pipeline)
+            p.inputs = [(a, h) for a, h in zip(in_arrays, stream_handlers)]
+        else:
+            p = pdal.Pipeline(pipeline, arrays=in_arrays, stream_handlers=stream_handlers)
+
+        count = p.execute()
+        out_arrays = p.arrays
+        assert count == expected_count
+        assert len(out_arrays) == len(in_arrays)
+
+        for in_array_chunks, out_array in zip(in_arrays_chunks, out_arrays):
+            np.testing.assert_array_equal(out_array, np.concatenate(in_array_chunks))
+
+    @pytest.mark.parametrize("in_arrays, use_setter", [
+        (arrays, use_setter) for arrays, use_setter in product([
+            [c[0] for c in ONE_ARRAY_FULL],
+            [c[0] for c in MULTI_ARRAYS_FULL]
+        ], ['False', 'True'])
+    ])
+    def test_pipeline_run_backward_compat(self, in_arrays, use_setter: bool):
+        expected_count = sum([len(a) for a in in_arrays])
+
+        pipeline = """
+        {
+            "pipeline": [{
+                "type": "filters.stats"
+            }]
+        }
+        """
+        if use_setter:
+            p = pdal.Pipeline(pipeline)
+            p.inputs = in_arrays
+        else:
+            p = pdal.Pipeline(pipeline, arrays=in_arrays)
+
+        count = p.execute()
+        out_arrays = p.arrays
+        assert count == expected_count
+        assert len(out_arrays) == len(in_arrays)
+
+        for in_array, out_array in zip(in_arrays, out_arrays):
+            np.testing.assert_array_equal(out_array, in_array)
+
+    @pytest.mark.parametrize("in_array, invalid_chunk_size", [
+        (in_array, invalid_chunk_size) for in_array, invalid_chunk_size in product(
+            [gen_chunk(1234)],
+            [-1, 12345])
+    ])
+    def test_pipeline_fail_with_invalid_chunk_size(self, in_array, invalid_chunk_size):
+        """
+        Ensure execution fails when using an invalid stream handler:
+        - One that returns a negative chunk size
+        - One that returns a chunk size bigger than the buffer capacity
+        """
+        was_called = False
+        def invalid_stream_handler():
+            nonlocal was_called
+            if was_called:
+                # avoid infinite loop
+                raise ValueError("Invalid handler should not have been called a second time")
+            was_called = True
+            return invalid_chunk_size
 
-def test_suite():
-    return unittest.TestSuite(
-        [TestPipeline])
+        p = pdal.Pipeline(arrays=[in_array], stream_handlers=[invalid_stream_handler])
+        with pytest.raises(RuntimeError,
+                           match=f"Stream chunk size not in the range of array length: {invalid_chunk_size}"):
+            p.execute()
 
-if __name__ == '__main__':
-    unittest.main()