diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index 0effb3f8..00000000 --- a/.appveyor.yml +++ /dev/null @@ -1,33 +0,0 @@ -version: 1.0.{build} - -os: Visual Studio 2015 - -platform: x64 - - -matrix: - fast_finish: true - -# Should speed up repository cloning -# -shallow_clone: true -clone_depth: 5 - -#init: -# - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) - -#on_finish: -# - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) - -cache: - C:\Miniconda3-x64\pkgs -> appveyor.yml - -install: - - call scripts\\appveyor\\config.cmd - -build_script: - - call scripts\\appveyor\\build.cmd - -test_script: - - call scripts\\appveyor\\test.cmd - - diff --git a/.github/environment.yml b/.github/environment.yml new file mode 100644 index 00000000..d18b1d3a --- /dev/null +++ b/.github/environment.yml @@ -0,0 +1,12 @@ +name: testenv +channels: + - conda-forge +dependencies: + - scikit-build-core + - numpy + - compilers + - pybind11 + - libpdal-core + - pytest + - meshio + - geopandas diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 00000000..fc69ec07 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,78 @@ +name: Build + +on: + pull_request: + branches: + - '*' + push: + branches: + - '*' + release: + types: + - published + +defaults: + run: + shell: bash -l -eo pipefail {0} + +jobs: + build: + name: ${{ matrix.os }} py${{ matrix.python-version }} numpy ${{ matrix.numpy-version }} + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: true + matrix: + os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] + python-version: ['3.10', '3.11', '3.12', '3.13'] + numpy-version: ['1.24', '2.1'] + exclude: + - python-version: '3.12' + numpy-version: '1.24' + - python-version: '3.13' + numpy-version: '1.24' + + steps: + - name: Check out python-pdal + uses: actions/checkout@v4 + + - name: Check out python-pdal-plugins + uses: actions/checkout@v4 + with: + repository: PDAL/python-plugins + path: ./plugins + ref: main + + - name: Setup micromamba + uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + python-version: ${{ matrix.python-version }} + use-mamba: true + auto-update-conda: true + environment-file: .github/environment.yml + + - name: Install numpy ${{ matrix.numpy-version }} + run: | + mamba install -y numpy=${{ matrix.numpy-version }} + + - name: Install python-pdal + run: | + pip install -vv . --no-deps --no-build-isolation + + - name: Install python-pdal-plugins + working-directory: ./plugins + run: pip install -vv . --no-deps --no-build-isolation + + - name: Test + run: | + export PDAL_DRIVER_PATH=$(python -m pdal --pdal-driver-path) + export PDAL_PLUGIN_PATH=$(python -m pdal --pdal-plugin-path) + echo "PDAL_DRIVER_PATH $PDAL_DRIVER_PATH" + echo "PDAL_PLUGIN_PATH $PDAL_PLUGIN_PATH" + export PDAL_DRIVER_PATH=$PDAL_PLUGIN_PATH:$PDAL_DRIVER_PATH + python -m pdal + pdal --drivers --debug + python -m pytest -v test/ + diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..99b22df6 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,62 @@ +name: Release + +on: + workflow_dispatch: + pull_request: + paths: + - '.github/workflows/release.yml' + push: + branches: + - main + paths: + - '.github/workflows/release.yml' + release: + types: + - published + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: true + +jobs: + + build_sdist: + name: Build source distribution + runs-on: ubuntu-latest + environment: + name: release + url: https://pypi.org/p/pdal-plugins + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + strategy: + fail-fast: true + + steps: + - uses: actions/checkout@v4 + + - name: Setup environment + uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-variant: Miniforge3 + miniforge-version: latest + python-version: '3.12' + use-mamba: true + auto-update-conda: true + environment-file: .github/environment.yml + + - name: Install dependencies + shell: bash -l {0} + run: | + python -m pip install build pipx twine + pipx run build --sdist -Ccmake.define.CMAKE_BUILD_WITH_INSTALL_RPATH=ON + + - uses: actions/upload-artifact@v4 + with: + name: cibw-sdist + path: dist/*.tar.gz + + - name: Publish package distributions to PyPI + if: github.event_name == 'release' && github.event.action == 'published' + uses: pypa/gh-action-pypi-publish@release/v1 + + diff --git a/.gitignore b/.gitignore index 42f64d0d..4cb02b15 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,11 @@ -pdal/libpdalpython.cpp *.pyc +_skbuild/* +.vscode/* __pycache__ build/* PDAL.egg-info/* dist/* +*.o +*.so +*.dylib +.DS_Store diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 5bc813b1..00000000 --- a/.travis.yml +++ /dev/null @@ -1,13 +0,0 @@ -# .travis.yml -# Configure Travis CI service for http://github.com/PDAL/python - -sudo: required - -services: docker - -before_install: - - docker pull pdal/pdal:latest - -script: - - docker run -v $TRAVIS_BUILD_DIR:/pdal -t pdal/pdal:latest /bin/sh -c "/pdal/scripts/travis/script.sh" - diff --git a/CHANGES.txt b/CHANGES.txt index b278b425..7e38fb77 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,12 +1,112 @@ Changes -================================================================================ +-------------------------------------------------------------------------------- + + +3.2.3 +................................................................................ + +Do not build and include wheels in distro + + +3.2.2 +................................................................................ + +* Implement move ctor to satisfy MSVC 2019 https://github.com/PDAL/python/commit/667f56bd0ee465f55a14636986e80b0a9cefcf14 + + +3.2.1 +................................................................................ + +* implement #129, add pandas DataFrame i/o for convenience by @hobu in + https://github.com/PDAL/python/pull/130 +* harden getMetadata and related calls from getting non-utf-8 'json' by @hobu + in https://github.com/PDAL/python/pull/140 +* ignore DataFrame test if not GeoPandas, give up on Python 3.7 builds by @hobu + in https://github.com/PDAL/python/pull/137 + +3.2.0 +................................................................................ + +* PDAL base library 2.4.0+ is required + +* CMake project name updated to pdal-python + +* `srswkt2` property added to allow fetching of SRS info + +* pip builds require cmake >= 3.11 + +* CMAKE_CXX_STANDARD set to c++17 to match PDAL 2.4.x + +* Driver and options *actually* uses the library instead of + shelling out to `pdal` application :) + +* _get_json renamed to toJSON and made public + +* Fix #119, 'json' optional kwarg put back for now + +* DEVELOPMENT_COMPONENT in CMake FindPython skipped on OSX + +* Make sure 'type' gets set when serializing to JSON + +3.1.0 +................................................................................ + +* **Breaking change** – pipeline.metadata now returns a dictionary from + json.loads instead of a string. + +* pipeline.quickinfo will fetch the PDAL preview() information for a data source. + You can use this to fetch header or other information without reading data. + https://github.com/PDAL/python/pull/109 + +* PDAL driver and option collection now uses the PDAL library directly rather + than shelling out to the pdal command https://github.com/PDAL/python/pull/107 + +* Pipelines now support pickling for use with things like Dask + https://github.com/PDAL/python/pull/110 + + + +3.0.0 +................................................................................ + +* Pythonic pipeline creation https://github.com/PDAL/python/pull/91 + +* Support streaming pipeline execution https://github.com/PDAL/python/pull/94 + +* Replace Cython with PyBind11 https://github.com/PDAL/python/pull/102 + +* Remove pdal.pio module https://github.com/PDAL/python/pull/101 + +* Move readers.numpy and filters.python to separate repository https://github.com/PDAL/python/pull/104 + +* Miscellaneous refactorings and cleanups + +2.3.5 +................................................................................ + +* Fix memory leak https://github.com/PDAL/python/pull/74 + +* Handle metadata with invalid unicode by erroring https://github.com/PDAL/python/pull/74 + +2.3.0 +................................................................................ + +* PDAL Python support 2.3.0 requires PDAL 2.1+. Older PDAL base libraries + likely will not work. + +* Python support built using scikit-build + +* readers.numpy and filters.python are installed along with the extension. + +* Pipeline can take in a list of arrays that are passed to readers.numpy + +* readers.numpy now supports functions that return arrays. See + https://pdal.io/stages/readers.numpy.html for more detail. 2.0.0 --------------------------------------------------------------------------------- +................................................................................ * PDAL Python extension is now in its own repository on its own release schedule at https://github.com/PDAL/python * Extension now builds and works under PDAL OSGeo4W64 on Windows. - - diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..61610cfb --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,41 @@ +cmake_minimum_required(VERSION 3.16.0) +project(pdal-python VERSION ${SKBUILD_PROJECT_VERSION} + DESCRIPTION "PDAL Python bindings" + HOMEPAGE_URL "https://github.com/PDAL/Python") + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_EXTENSIONS OFF) + +# Python-finding settings +set(Python3_FIND_STRATEGY "LOCATION") +set(Python3_FIND_REGISTRY "LAST") +set(Python3_FIND_FRAMEWORK "LAST") + +# Development vs. Development.Module +# https://cmake.org/cmake/help/latest/module/FindPython3.html?highlight=Development.Module +if (${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.18.0" AND NOT APPLE) + set(DEVELOPMENT_COMPONENT "Development.Module") +else() + set(DEVELOPMENT_COMPONENT "Development") +endif() + +# find Python3 +find_package(Python3 COMPONENTS Interpreter ${DEVELOPMENT_COMPONENT} NumPy REQUIRED) + +# find PDAL. Require 2.1+ +find_package(PDAL 2.7 REQUIRED) + +# find PyBind11 +find_package(pybind11 REQUIRED) + +set(extension "libpdalpython") +pybind11_add_module(${extension} MODULE + src/pdal/PyArray.cpp + src/pdal/PyPipeline.cpp + src/pdal/StreamableExecutor.cpp + src/pdal/libpdalpython.cpp +) +target_include_directories(${extension} PRIVATE ${Python3_NumPy_INCLUDE_DIRS}) +target_link_libraries(${extension} PRIVATE ${PDAL_LIBRARIES}) +install(TARGETS ${extension} LIBRARY DESTINATION "pdal") diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 57a62d95..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,6 +0,0 @@ -exclude *.txt -exclude MANIFEST.in -include CHANGES.txt README.rst -recursive-include test *.py -recursive-include pdal *.pyx -recursive-include pdal *.hpp diff --git a/PKG-INFO b/PKG-INFO deleted file mode 100644 index 7d2bfc03..00000000 --- a/PKG-INFO +++ /dev/null @@ -1,75 +0,0 @@ -Metadata-Version: 1.1 -Name: PDAL -Version: 1.6.0 -Summary: Point cloud data processing -Home-page: http://pdal.io -Author: Howard Butler -Author-email: howard@hobu.co -License: BSD -Description: ================================================================================ - PDAL - ================================================================================ - - The PDAL Python extension allows you to process data with PDAL into `Numpy`_ - arrays. Additionally, you can use it to fetch `schema`_ and `metadata`_ from - PDAL operations. - - Usage - -------------------------------------------------------------------------------- - - Given the following pipeline, which simply reads an `ASPRS LAS`_ file and - sorts it by the ``X`` dimension: - - .. _`ASPRS LAS`: https://www.asprs.org/committee-general/laser-las-file-format-exchange-activities.html - - .. code-block:: python - - - json = """ - { - "pipeline": [ - "1.2-with-color.las", - { - "type": "filters.sort", - "dimension": "X" - } - ] - }""" - - import pdal - pipeline = pdal.Pipeline(json) - pipeline.validate() # check if our JSON and options were good - pipeline.loglevel = 9 #really noisy - count = pipeline.execute() - arrays = pipeline.arrays - metadata = pipeline.metadata - log = pipeline.log - - - .. _`Numpy`: http://www.numpy.org/ - .. _`schema`: http://www.pdal.io/dimensions.html - .. _`metadata`: http://www.pdal.io/development/metadata.html - - Requirements - ================================================================================ - - * PDAL 1.4+ - * Python >=2.7 (including Python 3.x) - - - - Changes - ================================================================================ - - -Keywords: point cloud spatial -Platform: UNKNOWN -Classifier: Development Status :: 5 - Production/Stable -Classifier: Intended Audience :: Developers -Classifier: Intended Audience :: Science/Research -Classifier: License :: OSI Approved :: BSD License -Classifier: Operating System :: OS Independent -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Topic :: Scientific/Engineering :: GIS -Requires: Python (>=2.7) diff --git a/README.rst b/README.rst index 7203e881..ab90fd28 100644 --- a/README.rst +++ b/README.rst @@ -2,17 +2,52 @@ PDAL ================================================================================ -The PDAL Python extension allows you to process data with PDAL into `Numpy`_ -arrays. Additionally, you can use it to fetch `schema`_ and `metadata`_ from -PDAL operations. +PDAL Python support allows you to process data with PDAL into `Numpy`_ arrays. +It provides a PDAL extension module to control Python interaction with PDAL. +Additionally, you can use it to fetch `schema`_ and `metadata`_ from PDAL operations. + +Installation +-------------------------------------------------------------------------------- + +**Note** The PDAL Python bindings require the PDAL base library installed. Source code can be found at https://pdal.io and `GitHub `__. + +PyPI +................................................................................ + +PDAL Python support is installable via PyPI: + +.. code-block:: + + pip install PDAL + + +Developers can control many settings including debug builds and where the libraries are installed +using `scikit-build-core `_ settings: + +.. code-block:: + + python -m pip install \ + -Cbuild-dir=build \ + -e \ + . \ + --config-settings=cmake.build-type="Debug" \ + -vv \ + --no-deps \ + --no-build-isolation + +GitHub +................................................................................ The repository for PDAL's Python extension is available at https://github.com/PDAL/python -It is released independently from PDAL itself as of PDAL 1.7. +Python support released independently from PDAL itself as of PDAL 1.7. Usage -------------------------------------------------------------------------------- +Simple +................................................................................ + Given the following pipeline, which simply reads an `ASPRS LAS`_ file and sorts it by the ``X`` dimension: @@ -34,30 +69,430 @@ sorts it by the ``X`` dimension: import pdal pipeline = pdal.Pipeline(json) - pipeline.validate() # check if our JSON and options were good - pipeline.loglevel = 8 #really noisy count = pipeline.execute() arrays = pipeline.arrays metadata = pipeline.metadata log = pipeline.log +Programmatic Pipeline Construction +................................................................................ + +The previous example specified the pipeline as a JSON string. Alternatively, a +pipeline can be constructed by creating ``Stage`` instances and piping them +together. For example, the previous pipeline can be specified as: + +.. code-block:: python + + pipeline = pdal.Reader("1.2-with-color.las") | pdal.Filter.sort(dimension="X") + +Stage Objects +============= + +- A stage is an instance of ``pdal.Reader``, ``pdal.Filter`` or ``pdal.Writer``. +- A stage can be instantiated by passing as keyword arguments the options + applicable to the respective PDAL stage. For more on PDAL stages and their + options, check the PDAL documentation on `Stage Objects `__. + + - The ``filename`` option of ``Readers`` and ``Writers`` as well as the ``type`` + option of ``Filters`` can be passed positionally as the first argument. + - The ``inputs`` option specifies a sequence of stages to be set as input to the + current stage. Each input can be either the string tag of another stage, or + the ``Stage`` instance itself. +- The ``Reader``, ``Filter`` and ``Writer`` classes come with static methods for + all the respective PDAL drivers. For example, ``pdal.Filter.head()`` is a + shortcut for ``pdal.Filter(type="filters.head")``. These methods are + auto-generated by introspecting ``pdal`` and the available options are + included in each method's docstring: + +.. code-block:: + + >>> help(pdal.Filter.head) + Help on function head in module pdal.pipeline: + + head(**kwargs) + Return N points from beginning of the point cloud. + + user_data: User JSON + log: Debug output filename + option_file: File from which to read additional options + where: Expression describing points to be passed to this filter + where_merge='auto': If 'where' option is set, describes how skipped points should be merged with kept points in standard mode. + count='10': Number of points to return from beginning. If 'invert' is true, number of points to drop from the beginning. + invert='false': If true, 'count' specifies the number of points to skip from the beginning. + +Pipeline Objects +================ + +A ``pdal.Pipeline`` instance can be created from: + +- a JSON string: ``Pipeline(json_string)`` +- a sequence of ``Stage`` instances: ``Pipeline([stage1, stage2])`` +- a single ``Stage`` with the ``Stage.pipeline`` method: ``stage.pipeline()`` +- nothing: ``Pipeline()`` creates a pipeline with no stages. +- joining ``Stage`` and/or other ``Pipeline`` instances together with the pipe + operator (``|``): + + - ``stage1 | stage2`` + - ``stage1 | pipeline1`` + - ``pipeline1 | stage1`` + - ``pipeline1 | pipeline2`` + +Every application of the pipe operator creates a new ``Pipeline`` instance. To +update an existing ``Pipeline`` use the respective in-place pipe operator (``|=``): + +.. code-block:: python + + # update pipeline in-place + pipeline = pdal.Pipeline() + pipeline |= stage + pipeline |= pipeline2 + +Reading using Numpy Arrays +................................................................................ + +The following more complex scenario demonstrates the full cycling between +PDAL and Python: + +* Read a small testfile from GitHub into a Numpy array +* Filters the array with Numpy for Intensity +* Pass the filtered array to PDAL to be filtered again +* Write the final filtered array to a LAS file and a TileDB_ array + via the `TileDB-PDAL integration`_ using the `TileDB writer plugin`_ + +.. code-block:: python + + import pdal + + data = "https://github.com/PDAL/PDAL/blob/master/test/data/las/1.2-with-color.las?raw=true" + + pipeline = pdal.Reader.las(filename=data).pipeline() + print(pipeline.execute()) # 1065 points + + # Get the data from the first array + # [array([(637012.24, 849028.31, 431.66, 143, 1, + # 1, 1, 0, 1, -9., 132, 7326, 245380.78254963, 68, 77, 88), + # dtype=[('X', ' 30] + print(len(intensity)) # 704 points + + # Now use pdal to clamp points that have intensity 100 <= v < 300 + pipeline = pdal.Filter.expression(expression="Intensity >= 100 && Intensity < 300").pipeline(intensity) + print(pipeline.execute()) # 387 points + clamped = pipeline.arrays[0] + + # Write our intensity data to a LAS file and a TileDB array. For TileDB it is + # recommended to use Hilbert ordering by default with geospatial point cloud data, + # which requires specifying a domain extent. This can be determined automatically + # from a stats filter that computes statistics about each dimension (min, max, etc.). + pipeline = pdal.Writer.las( + filename="clamped.las", + offset_x="auto", + offset_y="auto", + offset_z="auto", + scale_x=0.01, + scale_y=0.01, + scale_z=0.01, + ).pipeline(clamped) + pipeline |= pdal.Filter.stats() | pdal.Writer.tiledb(array_name="clamped") + print(pipeline.execute()) # 387 points + + # Dump the TileDB array schema + import tiledb + with tiledb.open("clamped") as a: + print(a.schema) + +Reading using Numpy Arrays as buffers (advanced) +................................................................................ + +It's also possible to treat the Numpy arrays passed to PDAL as buffers that are iteratively populated through +custom python functions during the execution of the pipeline. + +This may be useful in cases where you want the reading of the input data to be handled in a streamable fashion, +like for example: + +* When the total Numpy array data wouldn't fit into memory. +* To initiate execution of a streamable PDAL pipeline while the input data is still being read. + +To enable this mode, you just need to include the python populate function along with each corresponding Numpy array. + +.. code-block:: python + + # Numpy array to be used as buffer + in_buffer = np.zeros(max_chunk_size, dtype=[("X", float), ("Y", float), ("Z", float)]) + + # The function to populate the buffer iteratively + def load_next_chunk() -> int: + """ + Function called by PDAL before reading the data from the buffer. + + IMPORTANT: must return the total number of items to be read from the buffer. + The Pipeline execution will keep calling this function in a loop until 0 is returned. + """ + # + # Replace here with your code that populates the buffer and returns the number of elements to read + # + chunk_size = next_chunk.size + in_buffer[:chunk_size]["X"] = next_chunk[:]["X"] + in_buffer[:chunk_size]["Y"] = next_chunk[:]["Y"] + in_buffer[:chunk_size]["Z"] = next_chunk[:]["Z"] + + return chunk_size + + # Configure input array and handler during Pipeline initialization... + p = pdal.Pipeline(pipeline_json, arrays=[in_buffer], stream_handlers=[load_next_chunk]) + + # ...alternatively you can use the setter on an existing Pipeline + # p.inputs = [(in_buffer, load_next_chunk)] + +The following snippet provides a simple example of how to use a Numpy array as buffer to support writing through PDAL +with total control over the maximum amount of memory to use. + +Example: Streaming the read and write of a very large LAZ file with low memory footprint +......................................................................................... + + + +.. code-block:: python + + import numpy as np + import pdal + + in_chunk_size = 10_000_000 + in_pipeline = pdal.Reader.las(**{ + "filename": "in_test.laz" + }).pipeline() + + in_pipeline_it = in_pipeline.iterator(in_chunk_size).__iter__() + + out_chunk_size = 50_000_000 + out_file = "out_test.laz" + out_pipeline = pdal.Writer.las( + filename=out_file + ).pipeline() + + out_buffer = np.zeros(in_chunk_size, dtype=[("X", float), ("Y", float), ("Z", float)]) + + def load_next_chunk(): + try: + next_chunk = next(in_pipeline_it) + except StopIteration: + # Stops the streaming + return 0 + + chunk_size = next_chunk.size + out_buffer[:chunk_size]["X"] = next_chunk[:]["X"] + out_buffer[:chunk_size]["Y"] = next_chunk[:]["Y"] + out_buffer[:chunk_size]["Z"] = next_chunk[:]["Z"] + + print(f"Loaded next chunk -> {chunk_size}") + + return chunk_size + + out_pipeline.inputs = [(out_buffer, load_next_chunk)] + + out_pipeline.loglevel = 20 # INFO + count = out_pipeline.execute_streaming(out_chunk_size) + + print(f"\nWROTE - {count}") + + + +Executing Streamable Pipelines +................................................................................ +Streamable pipelines (pipelines that consist exclusively of streamable PDAL +stages) can be executed in streaming mode via ``Pipeline.iterator()``. This +returns an iterator object that yields Numpy arrays of up to ``chunk_size`` size +(default=10000) at a time. + +.. code-block:: python + + import pdal + pipeline = pdal.Reader("test/data/autzen-utm.las") | pdal.Filter.expression(expression="Intensity > 80 && Intensity < 120)") + for array in pipeline.iterator(chunk_size=500): + print(len(array)) + # or to concatenate all arrays into one + # full_array = np.concatenate(list(pipeline)) + +``Pipeline.iterator()`` also takes an optional ``prefetch`` parameter (default=0) +to allow prefetching up to to this number of arrays in parallel and buffering +them until they are yielded to the caller. + +If you just want to execute a streamable pipeline in streaming mode and don't +need to access the data points (typically when the pipeline has Writer stage(s)), +you can use the ``Pipeline.execute_streaming(chunk_size)`` method instead. This +is functionally equivalent to ``sum(map(len, pipeline.iterator(chunk_size)))`` +but more efficient as it avoids allocating and filling any arrays in memory. + +Accessing Mesh Data +................................................................................ + +Some PDAL stages (for instance ``filters.delaunay``) create TIN type mesh data. + +This data can be accessed in Python using the ``Pipeline.meshes`` property, which returns a ``numpy.ndarray`` +of shape (1,n) where n is the number of Triangles in the mesh. + +If the PointView contains no mesh data, then n = 0. + +Each Triangle is a tuple ``(A,B,C)`` where A, B and C are indices into the PointView identifying the point that is the vertex for the Triangle. + +Meshio Integration +................................................................................ + +The meshes property provides the face data but is not easy to use as a mesh. Therefore, we have provided optional Integration +into the `Meshio `__ library. + +The ``pdal.Pipeline`` class provides the ``get_meshio(idx: int) -> meshio.Mesh`` method. This +method creates a `Mesh` object from the `PointView` array and mesh properties. + +.. note:: The meshio integration requires that meshio is installed (e.g. ``pip install meshio``). If it is not, then the method fails with an informative RuntimeError. + +Simple use of the functionality could be as follows: + +.. code-block:: python + + import pdal + + ... + pl = pdal.Pipeline(pipeline) + pl.execute() + + mesh = pl.get_meshio(0) + mesh.write('test.obj') + +Advanced Mesh Use Case +................................................................................ + +USE-CASE : Take a LiDAR map, create a mesh from the ground points, split into tiles and store the tiles in PostGIS. + +.. note:: Like ``Pipeline.arrays``, ``Pipeline.meshes`` returns a list of ``numpy.ndarray`` to provide for the case where the output from a Pipeline is multiple PointViews + +(example using 1.2-with-color.las and not doing the ground classification for clarity) + +.. code-block:: python + + import pdal + import psycopg2 + import io + + pl = ( + pdal.Reader(".../python/test/data/1.2-with-color.las") + | pdal.Filter.splitter(length=1000) + | pdal.Filter.delaunay() + ) + pl.execute() + + conn = psycopg(%CONNNECTION_STRING%) + buffer = io.StringIO + + for idx in range(len(pl.meshes)): + m = pl.get_meshio(idx) + if m: + m.write(buffer, file_format = "wkt") + with conn.cursor() as curr: + curr.execute( + "INSERT INTO %table-name% (mesh) VALUES (ST_GeomFromEWKT(%(ewkt)s)", + { "ewkt": buffer.getvalue()} + ) + + conn.commit() + conn.close() + buffer.close() + + +Digital Terrain Model (DTM) Creation Example +................................................................................ + +The following is a script sample that can be used to create a DTM from a PDAL- +readable pointcloud. + +Method: + +#. read point cloud file +#. remove noise +#. clean up invalid values +#. classify ground points using `SMRF `__ +#. write with `GDAL writer `__ + +.. note:: If your pointcloud already has ground classified, you can skip all but + the reader and writer and achieve the same result. + +.. code-block:: python + + import pdal + + pc_path = 'https://github.com/PDAL/data/raw/refs/heads/main/autzen/autzen.laz' + out_file = 'autzen_dtm.tif' + + + # read + reader = pdal.Reader.las(pc_path) + + # remove noisy points + lownoise_filter = pdal.Filter.range( + limits='Classification![7:7]', tag='lownoise' + ) + highnoise_filter = pdal.Filter.range( + limits='Classification![18:]', tag='highnoise' + ) + + # saving incorrectly labeled returns here, some people want this, some don't + prepare_ground = pdal.Filter.assign( + value=[ + 'Classification=0', + 'ReturnNumber=1 WHERE ReturnNumber < 1', + 'NumberOfReturns=1 WHERE NumberOfReturns < 1', + ], + tag='prepare_ground_classifier', + ) + + # classify ground + smrf_classifier = pdal.Filter.smrf(tag='ground_classifier') + + # write with gdal, resolution in feet for autzen + gdal_writer = pdal.Writer.gdal( + filename=out_file, + where='Classification == 2', + data_type='float32', + resolution=10, + output_type='idw', + window_size=3, + pdal_metadata=True, + ) + + # collect pdal stages and execute pipline + pipeline = ( + reader + | lownoise_filter + | highnoise_filter + | prepare_ground + | smrf_classifier + | gdal_writer + ) + pipeline.execute() + .. _`Numpy`: http://www.numpy.org/ .. _`schema`: http://www.pdal.io/dimensions.html .. _`metadata`: http://www.pdal.io/development/metadata.html +.. _`TileDB`: https://tiledb.com/ +.. _`TileDB-PDAL integration`: https://docs.tiledb.com/geospatial/pdal +.. _`TileDB writer plugin`: https://pdal.io/stages/writers.tiledb.html - -.. image:: https://travis-ci.org/PDAL/python.svg?branch=master - :target: https://travis-ci.org/PDAL/python - -.. image:: https://ci.appveyor.com/api/projects/status/of4kecyahpo8892d - :target: https://ci.appveyor.com/project/hobu/python/ +.. image:: https://github.com/PDAL/python/workflows/Build/badge.svg + :target: https://github.com/PDAL/python/actions?query=workflow%3ABuild Requirements ================================================================================ -* PDAL 1.7+ -* Python >=2.7 (including Python 3.x) -* Cython (eg :code:`pip install cython`) -* Packaging (eg :code:`pip install packaging`) - +* PDAL 2.7+ +* Python >=3.9 +* Pybind11 (eg :code:`pip install pybind11[global]`) +* Numpy >= 1.22 (eg :code:`pip install numpy`) +* scikit-build-core (eg :code:`pip install scikit-build-core`) diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index 7677b24b..00000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,5 +0,0 @@ -jobs: - - template: ./scripts/azp/linux.yml - - template: ./scripts/azp/win.yml - - template: ./scripts/azp/osx.yml - diff --git a/debug.sh b/debug.sh deleted file mode 100755 index 80f394b2..00000000 --- a/debug.sh +++ /dev/null @@ -1 +0,0 @@ -DYLD_LIBRARY_PATH=/Users/hobu/pdal-build/lib /Applications/Xcode.app/Contents/Developer/usr/bin/lldb -- python3 setup.py test diff --git a/pdal/PyPipeline.cpp b/pdal/PyPipeline.cpp deleted file mode 100644 index 66c613e3..00000000 --- a/pdal/PyPipeline.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/****************************************************************************** -* Copyright (c) 2016, Howard Butler (howard@hobu.co) -* -* All rights reserved. -* -* Redistribution and use in source and binary forms, with or without -* modification, are permitted provided that the following -* conditions are met: -* -* * Redistributions of source code must retain the above copyright -* notice, this list of conditions and the following disclaimer. -* * Redistributions in binary form must reproduce the above copyright -* notice, this list of conditions and the following disclaimer in -* the documentation and/or other materials provided -* with the distribution. -* * Neither the name of Hobu, Inc. or Flaxen Geo Consulting nor the -* names of its contributors may be used to endorse or promote -* products derived from this software without specific prior -* written permission. -* -* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS -* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY -* OF SUCH DAMAGE. -****************************************************************************/ - -#include "PyPipeline.hpp" - -#ifndef _WIN32 -#include -#endif - -#include -#include - -#include -#include - -#include "PyArray.hpp" - -namespace pdal -{ -namespace python -{ - -// Create a pipeline for writing data to PDAL -Pipeline::Pipeline(std::string const& json, std::vector arrays) : - m_executor(new PipelineExecutor(json)) -{ -#ifndef _WIN32 - // See comment in alternate constructor below. - ::dlopen("libpdal_base.so", RTLD_NOLOAD | RTLD_GLOBAL); -#endif - - if (_import_array() < 0) - throw pdal_error("Could not impory numpy.core.multiarray."); - - PipelineManager& manager = m_executor->getManager(); - - std::stringstream strm(json); - manager.readPipeline(strm); - std::vector roots = manager.roots(); - if (roots.size() != 1) - throw pdal_error("Filter pipeline must contain a single root stage."); - - for (auto array : arrays) - { - // Create numpy reader for each array - // Options - - Options options; - options.add("order", array->rowMajor() ? - MemoryViewReader::Order::RowMajor : - MemoryViewReader::Order::ColumnMajor); - options.add("shape", MemoryViewReader::Shape(array->shape())); - - Stage& s = manager.makeReader("", "readers.memoryview", options); - MemoryViewReader& r = dynamic_cast(s); - for (auto f : array->fields()) - r.pushField(f); - - ArrayIter& iter = array->iterator(); - auto incrementer = [&iter](PointId id) -> char * - { - if (! iter) - return nullptr; - - char *c = *iter; - ++iter; - return c; - }; - - r.setIncrementer(incrementer); - PyObject* parray = (PyObject*)array->getPythonArray(); - if (!parray) - throw pdal_error("array was none!"); - - roots[0]->setInput(r); - } - - manager.validateStageOptions(); -} - -// Create a pipeline for reading data from PDAL -Pipeline::Pipeline(std::string const& json) : - m_executor(new PipelineExecutor(json)) -{ - // Make the symbols in pdal_base global so that they're accessible - // to PDAL plugins. Python dlopen's this extension with RTLD_LOCAL, - // which means that without this, symbols in libpdal_base aren't available - // for resolution of symbols on future runtime linking. This is an issue - // on Alpine and other Linux variants that don't use UNIQUE symbols - // for C++ template statics only. Without this, you end up with multiple - // copies of template statics. -#ifndef _WIN32 - ::dlopen("libpdal_base.so", RTLD_NOLOAD | RTLD_GLOBAL); -#endif - if (_import_array() < 0) - throw pdal_error("Could not impory numpy.core.multiarray."); -} - -Pipeline::~Pipeline() -{} - - -void Pipeline::setLogLevel(int level) -{ - m_executor->setLogLevel(level); -} - - -int Pipeline::getLogLevel() const -{ - return static_cast(m_executor->getLogLevel()); -} - - -int64_t Pipeline::execute() -{ - return m_executor->execute(); -} - -bool Pipeline::validate() -{ - auto res = m_executor->validate(); - return res; -} - -std::vector Pipeline::getArrays() const -{ - std::vector output; - - if (!m_executor->executed()) - throw python_error("call execute() before fetching arrays"); - - const PointViewSet& pvset = m_executor->getManagerConst().views(); - - for (auto i: pvset) - { - //ABELL - Leak? - Array *array = new python::Array; - array->update(i); - output.push_back(array); - } - return output; -} - -} // namespace python -} // namespace pdal - diff --git a/pdal/__init__.py b/pdal/__init__.py deleted file mode 100644 index 4d775cb6..00000000 --- a/pdal/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -__version__='2.2.2' - -from .pipeline import Pipeline -from .array import Array -from .dimension import dimensions - -from pdal.libpdalpython import getVersionString, getVersionMajor, getVersionMinor, getVersionPatch, getSha1, getDebugInformation, getPluginInstallPath - -class Info(object): - version = getVersionString() - major = getVersionMajor() - minor = getVersionMinor() - patch = getVersionPatch() - debug = getDebugInformation() - sha1 = getSha1() - plugin = getPluginInstallPath() - -info = Info() diff --git a/pdal/array.py b/pdal/array.py deleted file mode 100644 index d9859278..00000000 --- a/pdal/array.py +++ /dev/null @@ -1,8 +0,0 @@ -import numpy as np -from pdal import libpdalpython - -class Array(object): - """A Numpy Array that can speak PDAL""" - - def __init__(self, data): - self.p = libpdalpython.PyArray(data) diff --git a/pdal/dimension.py b/pdal/dimension.py deleted file mode 100644 index 867609b1..00000000 --- a/pdal/dimension.py +++ /dev/null @@ -1,6 +0,0 @@ -import numpy as np -from pdal import libpdalpython -from pdal.libpdalpython import getDimensions - -dimensions = getDimensions() - diff --git a/pdal/libpdalpython.pyx b/pdal/libpdalpython.pyx deleted file mode 100644 index 690b0de1..00000000 --- a/pdal/libpdalpython.pyx +++ /dev/null @@ -1,170 +0,0 @@ -# distutils: language = c++ -# cython: c_string_type=unicode, c_string_encoding=utf8 - -from libcpp.vector cimport vector -from libcpp.string cimport string -from libc.stdint cimport uint32_t, int64_t -from libcpp cimport bool -from cpython.version cimport PY_MAJOR_VERSION -cimport numpy as np -np.import_array() - -from cpython cimport PyObject, Py_INCREF -from cython.operator cimport dereference as deref, preincrement as inc - -cdef extern from "pdal/pdal_config.hpp" namespace "pdal::Config": - cdef int versionMajor() except + - cdef int versionMinor() except + - cdef int versionPatch() except + - cdef string sha1() except+ - cdef string debugInformation() except+ - cdef string pluginInstallPath() except+ - cdef string versionString() except+ - -def getVersionString(): - return versionString() -def getVersionMajor(): - return versionMajor() -def getVersionMinor(): - return versionMinor() -def getVersionPatch(): - return versionPatch() -def getSha1(): - return sha1() -def getDebugInformation(): - return debugInformation() -def getPluginInstallPath(): - return pluginInstallPath() - -cdef extern from "PyArray.hpp" namespace "pdal::python": - cdef cppclass Array: - Array(np.ndarray) except + - void *getPythonArray() except+ - -cdef extern from "PyPipeline.hpp" namespace "pdal::python": - cdef cppclass Pipeline: - Pipeline(const char* ) except + - Pipeline(const char*, vector[Array*]& ) except + - int64_t execute() except + - bool validate() except + - string getPipeline() except + - string getMetadata() except + - string getSchema() except + - string getLog() except + - vector[Array*] getArrays() except + - int getLogLevel() - void setLogLevel(int) - -cdef class PyArray: - cdef Array *thisptr - def __cinit__(self, np.ndarray array): - self.thisptr = new Array(array) - def __dealloc__(self): - del self.thisptr - -cdef extern from "PyDimension.hpp": - ctypedef struct Dimension: - string name; - string description; - int size; - string type; -## string units; // Not defined by PDAL yet - - cdef vector[Dimension] getValidDimensions() except + - - -def getDimensions(): - cdef vector[Dimension] c_dims; - c_dims = getValidDimensions() - output = [] - cdef vector[Dimension].iterator it = c_dims.begin() - while it != c_dims.end(): - ptr = deref(it) - d = {} - d['name'] = ptr.name - d['description'] = ptr.description - kind = ptr.type + str(ptr.size) - d['dtype'] = np.dtype(kind) - ptr = deref(it) - output.append(d) - inc(it) - return output - - -cdef class PyPipeline: - cdef Pipeline *thisptr # hold a c++ instance which we're wrapping - - - def __cinit__(self, unicode json, list arrays=None): - cdef char* x = NULL - cdef int n_arrays; - if arrays: - n_arrays = len(arrays) - - cdef vector[Array*] c_arrays; - cdef np.ndarray np_array; - cdef Array* a - - if arrays is not None: - for array in arrays: - a = new Array(array) - c_arrays.push_back(a) - - self.thisptr = new Pipeline(json.encode('UTF-8'), c_arrays) - else: - self.thisptr = new Pipeline(json.encode('UTF-8')) - - def __dealloc__(self): - del self.thisptr - - property pipeline: - def __get__(self): - return self.thisptr.getPipeline() - - property metadata: - def __get__(self): - return self.thisptr.getMetadata() - - property loglevel: - def __get__(self): - return self.thisptr.getLogLevel() - def __set__(self, v): - self.thisptr.setLogLevel(v) - - property log: - def __get__(self): - - return self.thisptr.getLog() - - property schema: - def __get__(self): - import json - - j = self.thisptr.getSchema() - return json.loads(j) - - property arrays: - - def __get__(self): - v = self.thisptr.getArrays() - output = [] - cdef vector[Array*].iterator it = v.begin() - cdef Array* a - while it != v.end(): - ptr = deref(it) - a = ptr#.get() - o = a.getPythonArray() - output.append(o) - inc(it) - return output - - - def execute(self): - if not self.thisptr: - raise Exception("C++ Pipeline object not constructed!") - return self.thisptr.execute() - - def validate(self): - if not self.thisptr: - raise Exception("C++ Pipeline object not constructed!") - return self.thisptr.validate() diff --git a/pdal/pio.py b/pdal/pio.py deleted file mode 100644 index fbbc15e0..00000000 --- a/pdal/pio.py +++ /dev/null @@ -1,145 +0,0 @@ -""" -This module provides a python-syntax interface for constructing and executing pdal-python json -pipelines. The API is not explicitly defined but stage names are validated against the pdal executable's drivers when possible. - -To construct pipeline stages, access the driver name from this module. This will create -a callable function where driver parameters can be specified as keyword arguments. For example: - ->>> from pdal import pio ->>> las_reader = pio.readers.las(filename="test.las") - -To construct a pipeline, sum stages together. - ->>> pipeline = pio.readers.las(filename="test.las") + pio.writers.ply(filename="test.ply") - -To execute a pipeline and return results, call `execute`. - ->>> arr = pipeline.execute() # returns a numpy structured array - -To access the pipelines as a dict (which may be dumped to json), call `spec`. - ->>> json.dumps(pipeline.spec) - -""" - -import types -import json -import subprocess -from functools import partial -from collections import defaultdict -from itertools import chain -import copy -import warnings - -import pdal - -try: - PDAL_DRIVERS_JSON = subprocess.run(["pdal", "--drivers", "--showjson"], capture_output=True).stdout - PDAL_DRIVERS = json.loads(PDAL_DRIVERS_JSON) - _PDAL_VALIDATE = True -except: - PDAL_DRIVERS = [] - _PDAL_VALIDATE = False - -DEFAULT_STAGE_PARAMS = defaultdict(dict) -DEFAULT_STAGE_PARAMS.update({ -# TODO: add stage specific default configurations -}) - - -class StageSpec(object): - def __init__(self, prefix, **kwargs): - self.prefix = prefix - self.key = ".".join([self.prefix, kwargs.get("type", "")]) - self.spec = DEFAULT_STAGE_PARAMS[self.key].copy() - self.spec.update(kwargs) - self.spec["type"] = self.key - # NOTE: special case to support reading files without passing an explicit reader - if (self.prefix in ["readers", "writers"]) and kwargs.get("type") == "auto": - del self.spec["type"] - - @property - def pipeline(self): - """ - Promote this stage to a `pdal.pio.PipelineSpec` with one `pdal.pio.StageSpec` - and return it. - """ - output = PipelineSpec() - output.add_stage(self) - return output - - def __getattr__(self, name): - if _PDAL_VALIDATE and (name not in dir(self)): - raise AttributeError(f"'{self.prefix}.{name}' is an invalid or unsupported PDAL stage") - return partial(self.__class__, self.prefix, type=name) - - def __str__(self): - return json.dumps(self.spec, indent=4) - - def __add__(self, other): - return self.pipeline + other - - def __dir__(self): - extra_keys = [e["name"][len(self.key):] for e in PDAL_DRIVERS if e["name"].startswith(self.key)] + ["auto"] - return super().__dir__() + [e for e in extra_keys if len(e) > 0] - - def execute(self): - return self.pipeline.execute() - - -readers = StageSpec("readers") -filters = StageSpec("filters") -writers = StageSpec("writers") - - -class PipelineSpec(object): - stages = [] - - def __init__(self, other=None): - if other is not None: - self.stages = copy.copy(other.stages) - - @property - def spec(self): - """ - Return a `dict` containing the pdal pipeline suitable for dumping to json - """ - return { - "pipeline": [stage.spec for stage in self.stages] - } - - def add_stage(self, stage): - """ - Add a StageSpec to the end of this pipeline, and return the updated result. - """ - assert isinstance(stage, StageSpec), "Expected StageSpec" - - self.stages.append(stage) - return self - - def __str__(self): - return json.dumps(self.spec, indent=4) - - def __add__(self, stage_or_pipeline): - assert isinstance(stage_or_pipeline, (StageSpec, PipelineSpec)), "Expected StageSpec or PipelineSpec" - - output = self.__class__(self) - if isinstance(stage_or_pipeline, StageSpec): - output.add_stage(stage_or_pipeline) - elif isinstance(stage_or_pipeline, PipelineSpec): - for stage in stage_or_pipeline.stages: - output.add_stage(stage) - return output - - def execute(self): - """ - Shortcut to execute and return the results of the pipeline. - """ - # TODO: do some validation before calling execute - - # TODO: some exception/error handling around pdal - pipeline = pdal.Pipeline(json.dumps(self.spec)) - # pipeline.validate() # NOTE: disabling this because it causes segfaults in certain cases - pipeline.execute() - - return pipeline.arrays[0] # NOTE: are there situation where arrays has multiple elements? diff --git a/pdal/pipeline.py b/pdal/pipeline.py deleted file mode 100644 index fca579e9..00000000 --- a/pdal/pipeline.py +++ /dev/null @@ -1,47 +0,0 @@ - -from pdal import libpdalpython -import numpy as np - -class Pipeline(object): - """A PDAL pipeline object, defined by JSON. See http://www.pdal.io/pipeline.html for more - information on how to define one""" - - def __init__(self, json, arrays=None): - - if arrays: - self.p = libpdalpython.PyPipeline(json, arrays) - else: - self.p = libpdalpython.PyPipeline(json) - - def get_metadata(self): - return self.p.metadata - metadata = property(get_metadata) - - def get_schema(self): - return self.p.schema - schema = property(get_schema) - - def get_pipeline(self): - return self.p.pipeline - pipeline = property(get_pipeline) - - def get_loglevel(self): - return self.p.loglevel - - def set_loglevel(self, v): - self.p.loglevel = v - loglevel = property(get_loglevel, set_loglevel) - - def get_log(self): - return self.p.log - log = property(get_log) - - def execute(self): - return self.p.execute() - - def validate(self): - return self.p.validate() - - def get_arrays(self): - return self.p.arrays - arrays = property(get_arrays) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..e1b98235 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,69 @@ +[project] +name = "pdal" +description = "Point cloud data processing" +readme = "README.rst" +requires-python = ">=3.9" +license = {file = "LICENSE.txt"} +keywords = ["point", "cloud", "spatial"] +authors = [ + {email = "howard@hobu.co"}, + {name = "Howard Butler"} +] +maintainers = [ + {name = "Howard Butler", email = "howard@hobu.co"} +] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Topic :: Scientific/Engineering :: GIS", +] + +dependencies = [ + "numpy >= 1.22" +] + +dynamic = ["version"] + +[project.optional-dependencies] +test = [ + "pandas", + "meshio" +] + +[tool.setuptools] +package-dir = {"" = "src"} +zip-safe = false + +[project.urls] +homepage = "https://pdal.io" +documentation = "https://pdal.io" +repository = "https://github.com/PDAL/Python" +changelog = "https://github.com/PDAL/python/blob/main/README.rst" + +[build-system] +requires = ["scikit-build-core >= 0.9", "numpy >= 1.22", "pybind11[global]"] +build-backend = "scikit_build_core.build" + + +[tool.scikit-build] +build-dir = "build/{wheel_tag}" +sdist.exclude = [".github"] +sdist.cmake = true +cmake.build-type = "Release" +sdist.include = [ + "src", + "CMakeLists.txt" +] +build.verbose = false +logging.level = "ERROR" + +[tool.scikit-build.metadata.version] +provider = "scikit_build_core.metadata.regex" +input = "src/pdal/__init__.py" diff --git a/scripts/appveyor/build.cmd b/scripts/appveyor/build.cmd deleted file mode 100644 index 8306aebb..00000000 --- a/scripts/appveyor/build.cmd +++ /dev/null @@ -1,7 +0,0 @@ -call "%CONDA_ROOT%\Scripts\activate.bat" base -call conda install -c conda-forge -y pdal cython packaging - -call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 - -python setup.py build - diff --git a/scripts/appveyor/config.cmd b/scripts/appveyor/config.cmd deleted file mode 100644 index 3188a8bc..00000000 --- a/scripts/appveyor/config.cmd +++ /dev/null @@ -1,16 +0,0 @@ -@echo off - -set "CONDA_ROOT=C:\Miniconda3-x64" -set PATH=%CONDA_ROOT%;%CONDA_ROOT%\\scripts;%CONDA_ROOT%\\Library\\bin;%PATH%;C:\\Program Files (x86)\\CMake\\bin -conda config --set always_yes yes -conda config --add channels conda-forge -conda config --set auto_update_conda no -conda config --set channel_priority true -conda update -q --all -conda info -python -c "import sys; print(sys.version)" -python -c "import sys; print(sys.executable)" -python -c "import sys; print(sys.prefix)" - -dir - diff --git a/scripts/appveyor/test.cmd b/scripts/appveyor/test.cmd deleted file mode 100644 index 61c3b221..00000000 --- a/scripts/appveyor/test.cmd +++ /dev/null @@ -1,3 +0,0 @@ -call "%CONDA_ROOT%\Scripts\activate.bat" base - -python setup.py test diff --git a/scripts/azp/linux.yml b/scripts/azp/linux.yml deleted file mode 100644 index d5a98b99..00000000 --- a/scripts/azp/linux.yml +++ /dev/null @@ -1,21 +0,0 @@ -# -*- mode: yaml -*- - -jobs: -- job: linux - pool: - vmImage: ubuntu-16.04 - container: - image: pdal/pdal:latest - options: --privileged - timeoutInMinutes: 60 - steps: - - script: | - sudo apt-get update -y; sudo apt-get install python3-pip -y - pip3 install numpy packaging cython - displayName: 'Dependencies' - - script: | - python3 setup.py build - displayName: 'Build' - - script: | - python3 setup.py test - displayName: 'Test' diff --git a/scripts/azp/osx.yml b/scripts/azp/osx.yml deleted file mode 100644 index 1598d2ab..00000000 --- a/scripts/azp/osx.yml +++ /dev/null @@ -1,53 +0,0 @@ -# -*- mode: yaml -*- - -jobs: -- job: osx - pool: - vmImage: macOS-10.13 - timeoutInMinutes: 360 - steps: - - script: | - echo "Removing homebrew from Azure to avoid conflicts." - curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/uninstall > ~/uninstall_homebrew - chmod +x ~/uninstall_homebrew - ~/uninstall_homebrew -fq - rm ~/uninstall_homebrew - displayName: Remove homebrew - - bash: | - echo "##vso[task.prependpath]$CONDA/bin" - sudo chown -R $USER $CONDA - displayName: Add conda to PATH - - - script: | - ECHO ON - source activate base - conda create --yes --quiet --name pdal - displayName: Create conda environment - - - script: | - ECHO ON - source activate pdal - conda config --set always_yes True --set show_channel_urls True - conda install --yes --quiet --name pdal -c conda-forge cython packaging pdal numpy conda-forge-ci-setup=2 -y - displayName: Install PDAL - - script: | - source activate pdal - export CI=azure - source run_conda_forge_build_setup - env: { - OSX_FORCE_SDK_DOWNLOAD: "1" - } - displayName: Configure OSX SDK - - script: | - source activate pdal - pip install numpy packaging cython - displayName: 'Dependencies' - - script: | - source activate pdal - python setup.py build - displayName: 'Build' - - script: | - source activate pdal - python setup.py test - displayName: 'Test' - diff --git a/scripts/azp/win.yml b/scripts/azp/win.yml deleted file mode 100644 index 4abdc798..00000000 --- a/scripts/azp/win.yml +++ /dev/null @@ -1,40 +0,0 @@ - -jobs: -- job: win - - pool: - vmImage: vs2017-win2016 - timeoutInMinutes: 360 - steps: - - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" - displayName: Add conda to PATH - - - script: | - ECHO ON - call conda create --yes --quiet --name pdal - displayName: Create conda environment - - - script: | - ECHO ON - call activate pdal - call conda config --set always_yes True --set show_channel_urls True - call conda install --yes --quiet --name pdal -c conda-forge pdal numpy packaging cython -y - displayName: Install PDAL - - script: | - ECHO ON - call activate pdal - call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 - echo %PATH% - set CC=cl.exe - set CXX=cl.exe - python setup.py build - displayName: 'Build' - - script: | - ECHO ON - call activate pdal - call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 - set PYTHONHOME=%CONDA_PREFIX% - python setup.py test - displayName: 'Test' - - diff --git a/scripts/travis/script.sh b/scripts/travis/script.sh deleted file mode 100755 index 84f837c6..00000000 --- a/scripts/travis/script.sh +++ /dev/null @@ -1,5 +0,0 @@ -apt-get update -y; apt-get install python3-pip -y -pip3 install numpy packaging cython -cd /pdal/ -python3 setup.py build -python3 setup.py test diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 7f75bb2c..00000000 --- a/setup.cfg +++ /dev/null @@ -1,4 +0,0 @@ -[egg_info] -tag_build = -tag_date = 0 -tag_svn_revision = 0 diff --git a/setup.py b/setup.py deleted file mode 100644 index 5ab97be4..00000000 --- a/setup.py +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/env python - -# Stolen from Shapely's setup.py -# Two environment variables influence this script. -# -# PDAL_LIBRARY_PATH: a path to a PDAL C++ shared library. -# -# PDAL_CONFIG: the path to a pdal-config program that points to PDAL version, -# headers, and libraries. -# -# NB: within this setup scripts, software versions are evaluated according -# to https://www.python.org/dev/peps/pep-0440/. - -import logging -import os -import platform -import sys -import numpy -from Cython.Build import cythonize - -USE_CYTHON = True -try: - from Cython.Build import cythonize -except ImportError: - USE_CYTHON = False - -ext = '.pyx' if USE_CYTHON else '.cpp' - -from setuptools import setup -from packaging.version import Version - - -logging.basicConfig() -log = logging.getLogger(__file__) - -# python -W all setup.py ... -if 'all' in sys.warnoptions: - log.level = logging.DEBUG - - -# Second try: use PDAL_CONFIG environment variable -if 'PDAL_CONFIG' in os.environ: - pdal_config = os.environ['PDAL_CONFIG'] - log.debug('pdal_config: %s', pdal_config) -else: - pdal_config = 'pdal-config' - # in case of windows... - if os.name in ['nt']: - pdal_config += '.bat' - - -def get_pdal_config(option): - '''Get configuration option from the `pdal-config` development utility - - This code was adapted from Shapely's geos-config stuff - ''' - import subprocess - pdal_config = globals().get('pdal_config') - if not pdal_config or not isinstance(pdal_config, str): - raise OSError('Path to pdal-config is not set') - try: - stdout, stderr = subprocess.Popen( - [pdal_config, option], - stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() - except OSError as ex: - # e.g., [Errno 2] No such file or directory - raise OSError( - 'Could not find pdal-config %r: %s' % (pdal_config, ex)) - if stderr and not stdout: - raise ValueError(stderr.strip()) - if sys.version_info[0] >= 3: - result = stdout.decode('ascii').strip() - else: - result = stdout.strip() - log.debug('%s %s: %r', pdal_config, option, result) - return result - -# Get the version from the pdal module -module_version = None -with open('pdal/__init__.py', 'r') as fp: - for line in fp: - if line.startswith("__version__"): - module_version = Version(line.split("=")[1].strip().strip("\"'")) - break - -if not module_version: - raise ValueError("Could not determine PDAL's version") - -# Handle UTF-8 encoding of certain text files. -open_kwds = {} -if sys.version_info >= (3,): - open_kwds['encoding'] = 'utf-8' - -with open('README.rst', 'r', **open_kwds) as fp: - readme = fp.read() - -with open('CHANGES.txt', 'r', **open_kwds) as fp: - changes = fp.read() - -long_description = readme + '\n\n' + changes - -include_dirs = [] -library_dirs = [] -libraries = [] -extra_link_args = [] -extra_compile_args = [] - -from setuptools.extension import Extension as DistutilsExtension - -PDALVERSION = None -if pdal_config and "clean" not in sys.argv: - # Collect other options from PDAL - try: - - # Running against different major versions is going to fail. - # Minor versions might too, depending on numpy. - for item in get_pdal_config('--python-version').split(): - if item: - # 2.7.4 or 3.5.2 - built_version = item.split('.') - built_major = int(built_version[0]) - running_major = int(sys.version_info[0]) - if built_major != running_major: - message = "Version mismatch. PDAL Python support was compiled against version %d.x but setup is running version is %d.x. " - raise Exception(message % (built_major, running_major)) - - # older versions of pdal-config do not include --python-version switch - except ValueError: - pass - PDALVERSION = Version(get_pdal_config('--version')) - - separator = ':' - if os.name in ['nt']: - separator = ';' - - for item in get_pdal_config('--includes').split(): - if item.startswith("-I"): - include_dirs.extend(item[2:].split(separator)) - - for item in get_pdal_config('--libs').split(): - if item.startswith("-L"): - library_dirs.extend(item[2:].split(separator)) - elif item.startswith("-l"): - libraries.append(item[2:]) - -include_dirs.append(numpy.get_include()) - -if platform.system() == 'Darwin': - extra_link_args.append('-Wl,-rpath,'+library_dirs[0]) - -DEBUG=True -if DEBUG: - if os.name != 'nt': - extra_compile_args += ['-g','-O0'] - -if PDALVERSION is not None and PDALVERSION < Version('2.0.0'): - raise Exception("PDAL version '%s' is not compatible with PDAL Python library version '%s'"%(PDALVERSION, module_version)) - - -if os.name in ['nt']: - if os.environ.get('OSGEO4W_ROOT'): - library_dirs = ['c:/%s/lib' % os.environ.get('OSGEO4W_ROOT')] - if os.environ.get('CONDA_PREFIX'): - prefix=os.path.expandvars('%CONDA_PREFIX%') - library_dirs = ['%s\Library\lib' % prefix] - - libraries = ['pdalcpp','pdal_util','ws2_32'] - - extra_compile_args = ['/DNOMINMAX',] - -if 'linux' in sys.platform or 'linux2' in sys.platform or 'darwin' in sys.platform: - extra_compile_args += ['-std=c++11', '-Wno-unknown-pragmas'] - if 'GCC' in sys.version: - # try to ensure the ABI for Conda GCC 4.8 - if '4.8' in sys.version: - extra_compile_args += ['-D_GLIBCXX_USE_CXX11_ABI=0'] - - - -sources=['pdal/libpdalpython'+ext, "pdal/PyPipeline.cpp", "pdal/PyArray.cpp" ] -extensions = [DistutilsExtension("*", - sources, - include_dirs=include_dirs, - library_dirs=library_dirs, - extra_compile_args=extra_compile_args, - libraries=libraries, - extra_link_args=extra_link_args,)] -if USE_CYTHON and "clean" not in sys.argv: - from Cython.Build import cythonize - extensions= cythonize(extensions, compiler_directives={'language_level':3}) - -setup_args = dict( - name = 'PDAL', - version = str(module_version), - requires = ['Python (>=3.0)', 'Numpy'], - description = 'Point cloud data processing', - license = 'BSD', - keywords = 'point cloud spatial', - author = 'Howard Butler', - author_email = 'howard@hobu.co', - maintainer = 'Howard Butler', - maintainer_email = 'howard@hobu.co', - url = 'http://pdal.io', - long_description = long_description, - test_suite = 'test', - packages = [ - 'pdal', - ], - classifiers = [ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Topic :: Scientific/Engineering :: GIS', - ], - cmdclass = {}, - install_requires = ['numpy', 'packaging', 'cython'], -) -setup(ext_modules=extensions, **setup_args) - diff --git a/setup.py.off b/setup.py.off new file mode 100644 index 00000000..a3fd2a22 --- /dev/null +++ b/setup.py.off @@ -0,0 +1,45 @@ +from skbuild import setup + +# Get the version from the pdal module +with open("pdal/__init__.py", "r") as fp: + for line in fp: + if line.startswith("__version__"): + version = line.split("=")[1].strip().strip("\"'") + break + else: + raise ValueError("Could not determine Python package version") + +with open("README.rst", "r", encoding="utf-8") as fp: + readme = fp.read() + +with open("CHANGES.txt", "r", encoding="utf-8") as fp: + changes = fp.read() + +setup( + name="pdal", + version=version, + description="Point cloud data processing", + license="BSD", + keywords="point cloud spatial", + author="Howard Butler", + author_email="howard@hobu.co", + maintainer="Howard Butler", + maintainer_email="howard@hobu.co", + url="https://pdal.io", + long_description=readme + "\n\n" + changes, + long_description_content_type="text/x-rst", + packages=["pdal"], + install_requires=["numpy"], + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Topic :: Scientific/Engineering :: GIS", + ], +) diff --git a/pdal/PyArray.cpp b/src/pdal/PyArray.cpp similarity index 52% rename from pdal/PyArray.cpp rename to src/pdal/PyArray.cpp index 73e6410d..62b4875a 100644 --- a/pdal/PyArray.cpp +++ b/src/pdal/PyArray.cpp @@ -35,8 +35,6 @@ #include "PyArray.hpp" #include -#include - namespace pdal { namespace python @@ -79,7 +77,7 @@ Dimension::Type pdalType(int t) return Type::None; } -std::string toString(PyObject *pname) +std::string pyObjectToString(PyObject *pname) { PyObject* r = PyObject_Str(pname); if (!r) @@ -90,25 +88,25 @@ std::string toString(PyObject *pname) } // unnamed namespace -Array::Array() : m_array(nullptr) -{ - if (_import_array() < 0) - throw pdal_error("Could not import numpy.core.multiarray."); -} -Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true) -{ - if (_import_array() < 0) - throw pdal_error("Could not import numpy.core.multiarray."); +#if NPY_ABI_VERSION < 0x02000000 + #define PyDataType_FIELDS(descr) ((descr)->fields) + #define PyDataType_NAMES(descr) ((descr)->names) +#endif +Array::Array(PyArrayObject* array, std::shared_ptr stream_handler) + : m_array(array), m_rowMajor(true), m_stream_handler(std::move(stream_handler)) +{ Py_XINCREF(array); PyArray_Descr *dtype = PyArray_DTYPE(m_array); npy_intp ndims = PyArray_NDIM(m_array); npy_intp *shape = PyArray_SHAPE(m_array); - int numFields = (dtype->fields == Py_None) ? + + PyObject* fields = PyDataType_FIELDS(dtype); + int numFields = (fields == Py_None) ? 0 : - static_cast(PyDict_Size(dtype->fields)); + static_cast(PyDict_Size(fields)); int xyz = 0; if (numFields == 0) @@ -119,7 +117,7 @@ Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true) } else { - PyObject *names_dict = dtype->fields; + PyObject *names_dict = fields; PyObject *names = PyDict_Keys(names_dict); PyObject *values = PyDict_Values(names_dict); if (!names || !values) @@ -127,7 +125,7 @@ Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true) for (int i = 0; i < numFields; ++i) { - std::string name = toString(PyList_GetItem(names, i)); + std::string name = python::pyObjectToString(PyList_GetItem(names, i)); if (name == "X") xyz |= 1; else if (name == "Y") @@ -163,153 +161,80 @@ Array::Array(PyArrayObject* array) : m_array(array), m_rowMajor(true) Array::~Array() { - if (m_array) - Py_XDECREF((PyObject *)m_array); -} - - -void Array::update(PointViewPtr view) -{ - if (m_array) - Py_XDECREF((PyObject *)m_array); - m_array = nullptr; // Just in case of an exception. - - Dimension::IdList dims = view->dims(); - npy_intp size = view->size(); - - PyObject *dtype_dict = (PyObject*)buildNumpyDescription(view); - if (!dtype_dict) - throw pdal_error("Unable to build numpy dtype " - "description dictionary"); - - PyArray_Descr *dtype = nullptr; - if (PyArray_DescrConverter(dtype_dict, &dtype) == NPY_FAIL) - throw pdal_error("Unable to build numpy dtype"); - Py_XDECREF(dtype_dict); - - // This is a 1 x size array. - m_array = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, - 1, &size, 0, nullptr, NPY_ARRAY_CARRAY, nullptr); - - // copy the data - DimTypeList types = view->dimTypes(); - for (PointId idx = 0; idx < view->size(); idx++) - { - char *p = (char *)PyArray_GETPTR1(m_array, idx); - view->getPackedPoint(types, idx, p); - } -} - - -//ABELL - Who's responsible for incrementing the ref count? -PyArrayObject *Array::getPythonArray() const -{ - return m_array; -} - -PyObject* Array::buildNumpyDescription(PointViewPtr view) const -{ - // Build up a numpy dtype dictionary - // - // {'formats': ['f8', 'f8', 'f8', 'u2', 'u1', 'u1', 'u1', 'u1', 'u1', - // 'f4', 'u1', 'u2', 'f8', 'u2', 'u2', 'u2'], - // 'names': ['X', 'Y', 'Z', 'Intensity', 'ReturnNumber', - // 'NumberOfReturns', 'ScanDirectionFlag', 'EdgeOfFlightLine', - // 'Classification', 'ScanAngleRank', 'UserData', - // 'PointSourceId', 'GpsTime', 'Red', 'Green', 'Blue']} - // - - Dimension::IdList dims = view->dims(); - - PyObject* dict = PyDict_New(); - PyObject* sizes = PyList_New(dims.size()); - PyObject* formats = PyList_New(dims.size()); - PyObject* titles = PyList_New(dims.size()); - - for (size_t i = 0; i < dims.size(); ++i) - { - Dimension::Id id = dims[i]; - Dimension::Type t = view->dimType(id); - npy_intp stride = view->dimSize(id); - - std::string name = view->dimName(id); - - std::string kind("i"); - Dimension::BaseType b = Dimension::base(t); - if (b == Dimension::BaseType::Unsigned) - kind = "u"; - else if (b == Dimension::BaseType::Signed) - kind = "i"; - else if (b == Dimension::BaseType::Floating) - kind = "f"; - else - throw pdal_error("Unable to map kind '" + kind + - "' to PDAL dimension type"); - - std::stringstream oss; - oss << kind << stride; - PyObject* pySize = PyLong_FromLong(stride); - PyObject* pyTitle = PyUnicode_FromString(name.c_str()); - PyObject* pyFormat = PyUnicode_FromString(oss.str().c_str()); - - PyList_SetItem(sizes, i, pySize); - PyList_SetItem(titles, i, pyTitle); - PyList_SetItem(formats, i, pyFormat); - } - - PyDict_SetItemString(dict, "names", titles); - PyDict_SetItemString(dict, "formats", formats); - - return dict; -} - -bool Array::rowMajor() const -{ - return m_rowMajor; + Py_XDECREF(m_array); } -Array::Shape Array::shape() const +std::shared_ptr Array::iterator() { - return m_shape; + return std::make_shared(m_array, m_stream_handler); } -const Array::Fields& Array::fields() const +ArrayIter::ArrayIter(PyArrayObject* np_array, std::shared_ptr stream_handler) + : m_stream_handler(std::move(stream_handler)) { - return m_fields; -} + // Create iterator + m_iter = NpyIter_New(np_array, + NPY_ITER_EXTERNAL_LOOP | NPY_ITER_READONLY | NPY_ITER_REFS_OK, + NPY_KEEPORDER, NPY_NO_CASTING, NULL); + if (!m_iter) + throw pdal_error("Unable to create numpy iterator."); -ArrayIter& Array::iterator() -{ - ArrayIter *it = new ArrayIter(*this); - m_iterators.push_back(std::unique_ptr(it)); - return *it; + initIterator(); } -ArrayIter::ArrayIter(Array& array) +void ArrayIter::initIterator() { - m_iter = NpyIter_New(array.getPythonArray(), - NPY_ITER_EXTERNAL_LOOP | NPY_ITER_READONLY | NPY_ITER_REFS_OK, - NPY_KEEPORDER, NPY_NO_CASTING, NULL); - if (!m_iter) - throw pdal_error("Unable to create numpy iterator."); + // For a stream handler, first execute it to get the buffer populated and know the size of the data to iterate + int64_t stream_chunk_size = 0; + if (m_stream_handler) { + stream_chunk_size = (*m_stream_handler)(); + if (!stream_chunk_size) { + m_done = true; + return; + } + } + // Initialize the iterator function char *itererr; m_iterNext = NpyIter_GetIterNext(m_iter, &itererr); if (!m_iterNext) { NpyIter_Deallocate(m_iter); - throw pdal_error(std::string("Unable to create numpy iterator: ") + - itererr); + m_iter = nullptr; + throw pdal_error(std::string("Unable to retrieve iteration function from numpy iterator: ") + itererr); } m_data = NpyIter_GetDataPtrArray(m_iter); - m_stride = NpyIter_GetInnerStrideArray(m_iter); - m_size = NpyIter_GetInnerLoopSizePtr(m_iter); + m_stride = *NpyIter_GetInnerStrideArray(m_iter); + m_size = *NpyIter_GetInnerLoopSizePtr(m_iter); + if (stream_chunk_size) { + // Ensure chunk size is valid and then limit iteration accordingly + if (0 < stream_chunk_size && stream_chunk_size <= m_size) { + m_size = stream_chunk_size; + } else { + throw pdal_error(std::string("Stream chunk size not in the range of array length: ") + + std::to_string(stream_chunk_size)); + } + } m_done = false; } +void ArrayIter::resetIterator() +{ + // Reset the iterator to the initial state + if (NpyIter_Reset(m_iter, NULL) != NPY_SUCCEED) { + NpyIter_Deallocate(m_iter); + m_iter = nullptr; + throw pdal_error("Unable to reset numpy iterator."); + } + + initIterator(); +} + ArrayIter::~ArrayIter() { - NpyIter_Deallocate(m_iter); + if (m_iter != nullptr) { + NpyIter_Deallocate(m_iter); + } } ArrayIter& ArrayIter::operator++() @@ -317,23 +242,18 @@ ArrayIter& ArrayIter::operator++() if (m_done) return *this; - if (--(*m_size)) - *m_data += *m_stride; - else if (!m_iterNext(m_iter)) - m_done = true; + if (--m_size) { + *m_data += m_stride; + } else if (!m_iterNext(m_iter)) { + if (m_stream_handler) { + resetIterator(); + } else { + m_done = true; + } + } return *this; } -ArrayIter::operator bool () const -{ - return !m_done; -} - -char * ArrayIter::operator * () const -{ - return *m_data; -} - } // namespace python } // namespace pdal diff --git a/pdal/PyArray.hpp b/src/pdal/PyArray.hpp similarity index 66% rename from pdal/PyArray.hpp rename to src/pdal/PyArray.hpp index d77b3d10..b2aca844 100644 --- a/pdal/PyArray.hpp +++ b/src/pdal/PyArray.hpp @@ -34,69 +34,86 @@ #pragma once -#include - +#include "export.hpp" #include + +#define NPY_TARGET_VERSION NPY_1_22_API_VERSION +#define NPY_NO_DEPRECATED_API NPY_1_22_API_VERSION + +#define NO_IMPORT_ARRAY +#define PY_ARRAY_UNIQUE_SYMBOL PDAL_ARRAY_API + #include +#include +#include + +#include +#include + namespace pdal { namespace python { + class ArrayIter; -class PDAL_DLL Array +using ArrayStreamHandler = std::function; + +class PDAL_EXPORT Array { public: using Shape = std::array; using Fields = std::vector; - // Create an array for reading data from PDAL. - Array(); - - // Create an array for writing data to PDAL. - Array(PyArrayObject* array); - + Array(PyArrayObject* array, std::shared_ptr stream_handler = {}); ~Array(); - void update(PointViewPtr view); - PyArrayObject *getPythonArray() const; - bool rowMajor() const; - Shape shape() const; - const Fields& fields() const; - ArrayIter& iterator(); -private: - inline PyObject* buildNumpyDescription(PointViewPtr view) const; + Array(Array&& a) = default; + Array& operator=(Array&& a) = default; + Array(const Array&) = delete; + Array() = delete; + bool rowMajor() const { return m_rowMajor; }; + Shape shape() const { return m_shape; } + const Fields& fields() const { return m_fields; }; + std::shared_ptr iterator(); + +private: PyArrayObject* m_array; - Array& operator=(Array const& rhs); Fields m_fields; bool m_rowMajor; Shape m_shape {}; - std::vector> m_iterators; + std::shared_ptr m_stream_handler; }; -class ArrayIter + +class PDAL_EXPORT ArrayIter { public: ArrayIter(const ArrayIter&) = delete; + ArrayIter() = delete; - ArrayIter(Array& array); + ArrayIter(PyArrayObject*, std::shared_ptr); ~ArrayIter(); ArrayIter& operator++(); - operator bool () const; - char *operator * () const; + operator bool () const { return !m_done; } + char* operator*() const { return *m_data; } private: - NpyIter *m_iter; + NpyIter *m_iter = nullptr; NpyIter_IterNextFunc *m_iterNext; char **m_data; - npy_intp *m_size; - npy_intp *m_stride; + npy_intp m_size; + npy_intp m_stride; bool m_done; + + std::shared_ptr m_stream_handler; + void initIterator(); + void resetIterator(); }; } // namespace python diff --git a/pdal/PyDimension.hpp b/src/pdal/PyDimension.hpp similarity index 99% rename from pdal/PyDimension.hpp rename to src/pdal/PyDimension.hpp index f49645af..faaac509 100644 --- a/pdal/PyDimension.hpp +++ b/src/pdal/PyDimension.hpp @@ -45,7 +45,7 @@ typedef struct Dimension std::string name; std::string description; std::string type; - int size; + size_t size; std::string units; } Dimension; diff --git a/src/pdal/PyPipeline.cpp b/src/pdal/PyPipeline.cpp new file mode 100644 index 00000000..7f295273 --- /dev/null +++ b/src/pdal/PyPipeline.cpp @@ -0,0 +1,406 @@ +/****************************************************************************** +* Copyright (c) 2016, Howard Butler (howard@hobu.co) +* +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following +* conditions are met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided +* with the distribution. +* * Neither the name of Hobu, Inc. or Flaxen Geo Consulting nor the +* names of its contributors may be used to endorse or promote +* products derived from this software without specific prior +* written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +* OF SUCH DAMAGE. +****************************************************************************/ + +#include "PyArray.hpp" +#include "PyPipeline.hpp" +#include + +#ifndef _WIN32 +#include +#endif + +namespace pdal +{ +namespace python +{ + + +void CountPointTable::reset() +{ + for (PointId idx = 0; idx < numPoints(); idx++) + if (!skip(idx)) + m_count++; + FixedPointTable::reset(); +} + + +PipelineExecutor::PipelineExecutor( + std::string const& json, std::vector> arrays, int level) +{ + if (level < 0 || level > 8) + throw pdal_error("log level must be between 0 and 8!"); + + LogPtr log(Log::makeLog("pypipeline", &m_logStream)); + log->setLevel(static_cast(level)); + m_manager.setLog(log); + + std::stringstream strm; + strm << json; + m_manager.readPipeline(strm); + + addArrayReaders(arrays); +} + + +point_count_t PipelineExecutor::execute(pdal::StringList allowedDims) +{ + if (allowedDims.size()) + { + m_manager.pointTable().layout()->setAllowedDims(allowedDims); + } + + point_count_t count = m_manager.execute(); + m_executed = true; + return count; +} + +std::string PipelineExecutor::getSrsWKT2() const +{ + std::string output(""); + pdal::PointTableRef pointTable = m_manager.pointTable(); + + + pdal::SpatialReference srs = pointTable.spatialReference(); + output = srs.getWKT(); + + return output; +} + +point_count_t PipelineExecutor::executeStream(point_count_t streamLimit, + pdal::StringList allowedDims) +{ + CountPointTable table(streamLimit); + if (allowedDims.size()) + { + pointTable().layout()->setAllowedDims(allowedDims); + } + m_manager.executeStream(table); + m_executed = true; + return table.count(); +} + +const PointViewSet& PipelineExecutor::views() const +{ + if (!m_executed) + throw pdal_error("Pipeline has not been executed!"); + + return m_manager.views(); +} + + +std::string PipelineExecutor::getPipeline() const +{ + std::stringstream strm; + pdal::PipelineWriter::writePipeline(m_manager.getStage(), strm); + return strm.str(); +} + + +std::string PipelineExecutor::getMetadata() const +{ + if (!m_executed) + throw pdal_error("Pipeline has not been executed!"); + + std::stringstream strm; + MetadataNode root = m_manager.getMetadata().clone("metadata"); + pdal::Utils::toJSON(root, strm); + return strm.str(); +} + + +std::string PipelineExecutor::getSchema() const +{ + if (!m_executed) + throw pdal_error("Pipeline has not been executed!"); + + std::stringstream strm; + MetadataNode root = pointTable().layout()->toMetadata().clone("schema"); + pdal::Utils::toJSON(root, strm); + return strm.str(); +} + + +MetadataNode computePreview(Stage* stage) +{ + if (!stage) + throw pdal_error("no valid stage in QuickInfo"); + + QuickInfo qi = stage->preview(); + if (!qi.valid()) + throw pdal_error("No summary data available for stage '" + stage->getName()+"'" ); + + std::stringstream strm; + MetadataNode summary(stage->getName()); + summary.add("num_points", qi.m_pointCount); + if (qi.m_srs.valid()) + { + MetadataNode srs = qi.m_srs.toMetadata(); + summary.add(srs); + } + if (qi.m_bounds.valid()) + { + MetadataNode bounds = Utils::toMetadata(qi.m_bounds); + summary.add(bounds.clone("bounds")); + } + + std::string dims; + auto di = qi.m_dimNames.begin(); + while (di != qi.m_dimNames.end()) + { + dims += *di; + ++di; + if (di != qi.m_dimNames.end()) + dims += ", "; + } + if (dims.size()) + summary.add("dimensions", dims); + + if (!qi.m_metadata.empty() && qi.m_metadata.valid()) + { + summary.add(qi.m_metadata.clone("metadata")); + } + + pdal::Utils::toJSON(summary, strm); + return summary; + +} + + +std::string PipelineExecutor::getQuickInfo() const +{ + + Stage* stage(nullptr); + std::vector stages = m_manager.stages(); + std::vector previewStages; + + for (auto const& s: stages) + { + auto n = s->getName(); + auto v = pdal::Utils::split2(n,'.'); + if (v.size() > 0) + if (pdal::Utils::iequals(v[0], "readers")) + previewStages.push_back(s); + } + + MetadataNode summary; + for (auto const& stage: previewStages) + { + MetadataNode n = computePreview(stage); + summary.add(n); + } + + std::stringstream strm; + pdal::Utils::toJSON(summary, strm); + return strm.str(); +} + +void PipelineExecutor::addArrayReaders(std::vector> arrays) +{ + + if (arrays.empty()) + return; + + std::vector roots = m_manager.roots(); + if (roots.size() != 1) + throw pdal_error("Filter pipeline must contain a single root stage."); + + for (auto array : arrays) + { + // Create numpy reader for each array + // Options + + Options options; + options.add("order", array->rowMajor() ? + MemoryViewReader::Order::RowMajor : + MemoryViewReader::Order::ColumnMajor); + options.add("shape", MemoryViewReader::Shape(array->shape())); + + Stage& s = m_manager.makeReader("", "readers.memoryview", options); + MemoryViewReader& r = dynamic_cast(s); + for (auto f : array->fields()) + r.pushField(f); + + auto arrayIter = array->iterator(); + auto incrementer = [arrayIter, firstPoint = true](PointId id) mutable -> char * + { + ArrayIter& iter = *arrayIter; + if (!firstPoint && iter) { + ++iter; + } else { + firstPoint = false; + } + + if (!iter) + return nullptr; + + char *c = *iter; + return c; + }; + + r.setIncrementer(incrementer); + roots[0]->setInput(r); + } + + m_manager.validateStageOptions(); +} + + +PyObject* buildNumpyDescriptor(PointLayoutPtr layout) +{ + // Build up a numpy dtype dictionary + // + // {'formats': ['f8', 'f8', 'f8', 'u2', 'u1', 'u1', 'u1', 'u1', 'u1', + // 'f4', 'u1', 'u2', 'f8', 'u2', 'u2', 'u2'], + // 'names': ['X', 'Y', 'Z', 'Intensity', 'ReturnNumber', + // 'NumberOfReturns', 'ScanDirectionFlag', 'EdgeOfFlightLine', + // 'Classification', 'ScanAngleRank', 'UserData', + // 'PointSourceId', 'GpsTime', 'Red', 'Green', 'Blue']} + // + + // Ensure that the dimensions are sorted by offset + // Is there a better way? Can they be sorted by offset already? + auto sortByOffset = [layout](Dimension::Id id1, Dimension::Id id2) -> bool + { + return layout->dimOffset(id1) < layout->dimOffset(id2); + }; + + auto dims = layout->dims(); + std::sort(dims.begin(), dims.end(), sortByOffset); + + PyObject* names = PyList_New(dims.size()); + PyObject* formats = PyList_New(dims.size()); + for (size_t i = 0; i < dims.size(); ++i) + { + Dimension::Id id = dims[i]; + auto name = layout->dimName(id); + PyList_SetItem(names, i, PyUnicode_FromString(name.c_str())); + + std::stringstream format; + switch (Dimension::base(layout->dimType(id))) + { + case Dimension::BaseType::Unsigned: + format << 'u'; + break; + case Dimension::BaseType::Signed: + format << 'i'; + break; + case Dimension::BaseType::Floating: + format << 'f'; + break; + default: + throw pdal_error("Unable to map dimension '" + name + "' to Numpy"); + } + format << layout->dimSize(id); + PyList_SetItem(formats, i, PyUnicode_FromString(format.str().c_str())); + + } + PyObject* dtype_dict = PyDict_New(); + PyDict_SetItemString(dtype_dict, "names", names); + PyDict_SetItemString(dtype_dict, "formats", formats); + return dtype_dict; +} + + +PyArrayObject* viewToNumpyArray(PointViewPtr view) +{ + + PyObject* dtype_dict = buildNumpyDescriptor(view->layout()); + PyArray_Descr *dtype = nullptr; + if (PyArray_DescrConverter(dtype_dict, &dtype) == NPY_FAIL) + throw pdal_error("Unable to build numpy dtype"); + Py_XDECREF(dtype_dict); + + // This is a 1 x size array. + npy_intp size = view->size(); + PyArrayObject* array = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, dtype, + 1, &size, 0, nullptr, NPY_ARRAY_CARRAY, nullptr); + + // copy the data + DimTypeList types = view->dimTypes(); + for (PointId idx = 0; idx < view->size(); idx++) + view->getPackedPoint(types, idx, (char *)PyArray_GETPTR1(array, idx)); + return array; +} + + +PyArrayObject* meshToNumpyArray(const TriangularMesh* mesh) +{ + // Build up a numpy dtype dictionary + // + // {'formats': ['f8', 'f8', 'f8', 'u2', 'u1', 'u1', 'u1', 'u1', 'u1', + // 'f4', 'u1', 'u2', 'f8', 'u2', 'u2', 'u2'], + // 'names': ['X', 'Y', 'Z', 'Intensity', 'ReturnNumber', + // 'NumberOfReturns', 'ScanDirectionFlag', 'EdgeOfFlightLine', + // 'Classification', 'ScanAngleRank', 'UserData', + // 'PointSourceId', 'GpsTime', 'Red', 'Green', 'Blue']} + // + PyObject* names = PyList_New(3); + PyList_SetItem(names, 0, PyUnicode_FromString("A")); + PyList_SetItem(names, 1, PyUnicode_FromString("B")); + PyList_SetItem(names, 2, PyUnicode_FromString("C")); + + PyObject* formats = PyList_New(3); + PyList_SetItem(formats, 0, PyUnicode_FromString("u4")); + PyList_SetItem(formats, 1, PyUnicode_FromString("u4")); + PyList_SetItem(formats, 2, PyUnicode_FromString("u4")); + + PyObject* dtype_dict = PyDict_New(); + PyDict_SetItemString(dtype_dict, "names", names); + PyDict_SetItemString(dtype_dict, "formats", formats); + + PyArray_Descr *dtype = nullptr; + if (PyArray_DescrConverter(dtype_dict, &dtype) == NPY_FAIL) + throw pdal_error("Unable to build numpy dtype"); + Py_XDECREF(dtype_dict); + + // This is a 1 x size array. + npy_intp size = mesh ? mesh->size() : 0; + PyArrayObject* array = (PyArrayObject*)PyArray_NewFromDescr(&PyArray_Type, dtype, + 1, &size, 0, nullptr, NPY_ARRAY_CARRAY, nullptr); + for (PointId idx = 0; idx < size; idx++) + { + char* p = (char *)PyArray_GETPTR1(array, idx); + const Triangle& t = (*mesh)[idx]; + uint32_t a = (uint32_t)t.m_a; + std::memcpy(p, &a, 4); + uint32_t b = (uint32_t)t.m_b; + std::memcpy(p + 4, &b, 4); + uint32_t c = (uint32_t)t.m_c; + std::memcpy(p + 8, &c, 4); + } + return array; +} + +} // namespace python +} // namespace pdal diff --git a/src/pdal/PyPipeline.hpp b/src/pdal/PyPipeline.hpp new file mode 100644 index 00000000..1eed023f --- /dev/null +++ b/src/pdal/PyPipeline.hpp @@ -0,0 +1,102 @@ +/****************************************************************************** +* Copyright (c) 2016, Howard Butler (howard@hobu.co) +* +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following +* conditions are met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided +* with the distribution. +* * Neither the name of Hobu, Inc. or Flaxen Geo Consulting nor the +* names of its contributors may be used to endorse or promote +* products derived from this software without specific prior +* written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +* OF SUCH DAMAGE. +****************************************************************************/ + +#pragma once + +#include "export.hpp" +#include + +#define NPY_TARGET_VERSION NPY_1_22_API_VERSION +#define NPY_NO_DEPRECATED_API NPY_1_22_API_VERSION + +#define NO_IMPORT_ARRAY +#define PY_ARRAY_UNIQUE_SYMBOL PDAL_ARRAY_API + +#include + +namespace pdal +{ +namespace python +{ + +PyObject* buildNumpyDescriptor(PointLayoutPtr layout); +PyArrayObject* viewToNumpyArray(PointViewPtr view); +PyArrayObject* meshToNumpyArray(const TriangularMesh* mesh); + +class Array; + +class PDAL_EXPORT PipelineExecutor { +public: + PipelineExecutor(std::string const& json, std::vector> arrays, int level); + virtual ~PipelineExecutor() = default; + + point_count_t execute(pdal::StringList allowedDims); + point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims); + + const PointViewSet& views() const; + std::string getPipeline() const; + std::string getMetadata() const; + std::string getQuickInfo() const; + std::string getSchema() const; + std::string getSrsWKT2() const; + PipelineManager const& getManager() const { return m_manager; } + std::string getLog() const { return m_logStream.str(); } + +protected: + virtual ConstPointTableRef pointTable() const { return m_manager.pointTable(); } + + pdal::PipelineManager m_manager; + bool m_executed = false; + +private: + void addArrayReaders(std::vector> arrays); + + std::stringstream m_logStream; +}; + +class CountPointTable : public FixedPointTable +{ +public: + CountPointTable(point_count_t capacity) : FixedPointTable(capacity), m_count(0) {} + point_count_t count() const { return m_count; } + +protected: + virtual void reset(); + +private: + point_count_t m_count; +}; + +} // namespace python +} // namespace pdal diff --git a/src/pdal/StreamableExecutor.cpp b/src/pdal/StreamableExecutor.cpp new file mode 100644 index 00000000..5fa01931 --- /dev/null +++ b/src/pdal/StreamableExecutor.cpp @@ -0,0 +1,242 @@ +/****************************************************************************** +* Copyright (c) 2016, Howard Butler (howard@hobu.co) +* +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following +* conditions are met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided +* with the distribution. +* * Neither the name of Hobu, Inc. or Flaxen Geo Consulting nor the +* names of its contributors may be used to endorse or promote +* products derived from this software without specific prior +* written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +* OF SUCH DAMAGE. +****************************************************************************/ + +#include "PyPipeline.hpp" +#include "StreamableExecutor.hpp" + +#define NO_IMPORT_ARRAY +#define PY_ARRAY_UNIQUE_SYMBOL PDAL_ARRAY_API + +#include +#include + +#include +#include + +namespace pdal +{ +namespace python +{ + +// PythonPointTable + +PythonPointTable::PythonPointTable(point_count_t limit, int prefetch) : + StreamPointTable(m_layout, limit), m_prefetch(prefetch), + m_curArray(nullptr), m_dtype(nullptr) +{} + +PythonPointTable::~PythonPointTable() +{ + auto gil = PyGILState_Ensure(); + Py_XDECREF(m_dtype); + Py_XDECREF(m_curArray); + PyGILState_Release(gil); +} + +void PythonPointTable::finalize() +{ + BasePointTable::finalize(); + + // create dtype + auto gil = PyGILState_Ensure(); + + PyObject *dtype_dict = buildNumpyDescriptor(&m_layout); + if (PyArray_DescrConverter(dtype_dict, &m_dtype) == NPY_FAIL) + throw pdal_error("Unable to create numpy dtype"); + Py_XDECREF(dtype_dict); + PyGILState_Release(gil); + + py_createArray(); +} + +void PythonPointTable::py_createArray() +{ + auto gil = PyGILState_Ensure(); + npy_intp size = capacity(); + Py_INCREF(m_dtype); + m_curArray = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type, m_dtype, + 1, &size, 0, nullptr, NPY_ARRAY_CARRAY, nullptr); + PyGILState_Release(gil); +} + +void PythonPointTable::py_resizeArray(point_count_t np) +{ + npy_intp sizes[1]; + sizes[0] = np; + PyArray_Dims dims{ sizes, 1 }; + + auto gil = PyGILState_Ensure(); + // copy the non-skipped elements to the beginning + npy_intp dest_idx = 0; + for (PointId src_idx = 0; src_idx < numPoints(); src_idx++) + if (!skip(src_idx)) + { + if (src_idx != dest_idx) + { + PyObject* src_item = PyArray_GETITEM(m_curArray, (const char*) PyArray_GETPTR1(m_curArray, src_idx)); + PyArray_SETITEM(m_curArray, (char*) PyArray_GETPTR1(m_curArray, dest_idx), src_item); + Py_XDECREF(src_item); + } + dest_idx++; + } + PyArray_Resize(m_curArray, &dims, true, NPY_CORDER); + PyGILState_Release(gil); +} + +void PythonPointTable::reset() +{ + point_count_t np = 0; + for (PointId idx = 0; idx < numPoints(); idx++) + if (!skip(idx)) + np++; + + if (np && np != capacity()) + py_resizeArray(np); + + // This will keep putting arrays on the list until done, whether or not the consumer + // can handle them that fast. We can modify as appropriate to block if desired. + std::unique_lock l(m_mutex); + { + // It's possible that this is called with 0 points processed, in which case + // we don't push the current array. + if (np) + { + m_arrays.push(m_curArray); + py_createArray(); + m_producedCv.notify_one(); + } + while (m_arrays.size() > m_prefetch) + m_consumedCv.wait(l); + } +} + +void PythonPointTable::disable() +{ + // TODO: uncomment the next line when/if StreamPointTable.m_capacity + // changes from private to protected + // m_capacity = 0; +} + +void PythonPointTable::done() +{ + m_arrays.push(nullptr); + m_producedCv.notify_one(); +} + +PyArrayObject *PythonPointTable::fetchArray() +{ + PyArrayObject *arr = nullptr; + + // Lock scope. + Py_BEGIN_ALLOW_THREADS + { + std::unique_lock l(m_mutex); + while (m_arrays.empty()) + m_producedCv.wait(l); + + // Grab the array from the front of the list and notify that we did so. + arr = m_arrays.front(); + m_arrays.pop(); + } + Py_END_ALLOW_THREADS + // Notify that we consumed an array. + m_consumedCv.notify_one(); + return arr; +} + +char *PythonPointTable::getPoint(PointId idx) +{ + return (char *)PyArray_GETPTR1(m_curArray, idx); +} + + +// StreamableExecutor + +StreamableExecutor::StreamableExecutor(std::string const& json, + std::vector> arrays, + int level, + point_count_t chunkSize, + int prefetch, + pdal::StringList allowedDims) + : PipelineExecutor(json, arrays, level) + , m_table(chunkSize, prefetch) + , m_exc(nullptr) +{ + + if (allowedDims.size()) + { + m_table.layout()->setAllowedDims(allowedDims); + } + m_thread.reset(new std::thread([this]() + { + try { + m_manager.executeStream(m_table); + } catch (...) { + m_exc = std::current_exception(); + } + m_table.done(); + })); +} + +StreamableExecutor::~StreamableExecutor() +{ + if (!m_executed) + { + m_table.disable(); + auto gil = PyGILState_Ensure(); + while (PyArrayObject* arr = m_table.fetchArray()) + Py_XDECREF(arr); + PyGILState_Release(gil); + } + Py_BEGIN_ALLOW_THREADS + m_thread->join(); + Py_END_ALLOW_THREADS +} + +PyArrayObject *StreamableExecutor::executeNext() +{ + PyArrayObject* arr = nullptr; + if (!m_executed) + { + arr = m_table.fetchArray(); + if (arr == nullptr) + m_executed = true; + if (m_exc) + std::rethrow_exception(m_exc); + } + return arr; +} + +} // namespace python +} // namespace pdal diff --git a/pdal/PyPipeline.hpp b/src/pdal/StreamableExecutor.hpp similarity index 57% rename from pdal/PyPipeline.hpp rename to src/pdal/StreamableExecutor.hpp index a1ddc30b..f565c8ee 100644 --- a/pdal/PyPipeline.hpp +++ b/src/pdal/StreamableExecutor.hpp @@ -34,62 +34,66 @@ #pragma once -#include -#include -#include -#include +#include +#include -#include -#include -#include +#include "PyPipeline.hpp" namespace pdal { namespace python { -class Array; - -class python_error : public std::runtime_error +class PythonPointTable : public StreamPointTable { public: - inline python_error(std::string const& msg) : std::runtime_error(msg) - {} + PythonPointTable(point_count_t size, int prefetch); + ~PythonPointTable(); + + virtual void finalize(); + void disable(); + void done(); + PyArrayObject *fetchArray(); + +protected: + virtual void reset(); + virtual char *getPoint(PointId idx); + +private: + // All functions starting with py_ call Python things that need the GIL locked. + void py_createArray(); + void py_resizeArray(point_count_t np); + + int m_prefetch; + PointLayout m_layout; + PyArrayObject *m_curArray; + PyArray_Descr *m_dtype; + std::mutex m_mutex; + std::condition_variable m_producedCv; + std::condition_variable m_consumedCv; + std::queue m_arrays; }; -class Pipeline +class StreamableExecutor : public PipelineExecutor { public: - Pipeline(std::string const& json); - Pipeline(std::string const& json, - std::vector arrays); - ~Pipeline(); + StreamableExecutor(std::string const& json, + std::vector> arrays, + int level, + point_count_t chunkSize, + int prefetch, + pdal::StringList allowedDim); + ~StreamableExecutor(); - int64_t execute(); - bool validate(); - inline std::string getPipeline() const - { - return m_executor->getPipeline(); - } - inline std::string getMetadata() const - { - return m_executor->getMetadata(); - } - inline std::string getSchema() const - { - return m_executor->getSchema(); - } - inline std::string getLog() const - { - return m_executor->getLog(); - } - std::vector getArrays() const; - - void setLogLevel(int level); - int getLogLevel() const; + MetadataNode getMetadata() { return m_table.metadata(); } + PyArrayObject* executeNext(); private: - std::shared_ptr m_executor; + ConstPointTableRef pointTable() const { return m_table; } + + PythonPointTable m_table; + std::unique_ptr m_thread; + std::exception_ptr m_exc; }; } // namespace python diff --git a/src/pdal/__init__.py b/src/pdal/__init__.py new file mode 100644 index 00000000..c67ab5e3 --- /dev/null +++ b/src/pdal/__init__.py @@ -0,0 +1,12 @@ +__all__ = ["Pipeline", "Stage", "Reader", "Filter", "Writer", "dimensions", "info"] +__version__ = '3.5.3' + +from . import libpdalpython +from .drivers import inject_pdal_drivers +from .pipeline import Filter, Pipeline, Reader, Stage, Writer + +inject_pdal_drivers() +dimensions = libpdalpython.getDimensions() +info = libpdalpython.getInfo() + +del inject_pdal_drivers, libpdalpython diff --git a/src/pdal/__main__.py b/src/pdal/__main__.py new file mode 100644 index 00000000..4569e522 --- /dev/null +++ b/src/pdal/__main__.py @@ -0,0 +1,79 @@ +import sys +import os +import pathlib + +import sysconfig + +import argparse + +import pdal + +from . import __version__ + +__all__ = ["main"] + + +def __dir__() -> list[str]: + return __all__ + + +def print_driver_path(args): + if 'PDAL_DRIVER_PATH' in os.environ: + print (os.environ['PDAL_DRIVER_PATH']) + +def print_plugin_path(args): + purelib = sysconfig.get_paths()["purelib"] + + if sys.platform == "linux" or sys.platform == "linux2": + suffix = 'so' + purelib = purelib + os.path.sep + "pdal" + elif sys.platform == "darwin": + suffix = 'dylib' + purelib = purelib + os.path.sep + "pdal" + elif sys.platform == "win32": + suffix = 'dll' + purelib = purelib + os.path.sep + "bin" + + for f in pathlib.Path(purelib).glob(f'*.{suffix}'): + if 'pdal' in str(f.name): + if 'numpy' in str(f.name) or 'python' in str(f.name): + print (purelib) + return # we are done + +def print_version(args): + info = pdal.drivers.libpdalpython.getInfo() + pdal_version = info.version + plugin = info.plugin + debug = info.debug + + line = '----------------------------------------------------------------------------------------------------------------------------\n' + version = f'PDAL version {pdal_version}\nPython bindings version {__version__}\n' + driver_path = 'PDAL_DRIVER_PATH not set!' + if 'PDAL_DRIVER_PATH' in os.environ: + driver_path = os.environ['PDAL_DRIVER_PATH'] + plugin = f"Environment-set PDAL_DRIVER_PATH: {driver_path}" + output = f'{line}{version}{plugin}\n{line}\n{debug}' + print (output) + + +def main() -> None: + header = f"PDAL Python bindings {__version__} on Python {sys.version}" + + parser = argparse.ArgumentParser(description=header) + parser.add_argument('--pdal-driver-path', action='store_true', + help='print PDAL_DRIVER_PATH including Python plugin locations') + parser.add_argument('--pdal-plugin-path', action='store_true', + help='print location of PDAL Python plugins') + + args = parser.parse_args() + + if args.pdal_driver_path: + print_driver_path(args) + elif args.pdal_plugin_path: + print_plugin_path(args) + else: + print_version(args) + + +if __name__ == "__main__": + main() diff --git a/src/pdal/drivers.py b/src/pdal/drivers.py new file mode 100644 index 00000000..78d3dbb7 --- /dev/null +++ b/src/pdal/drivers.py @@ -0,0 +1,84 @@ +import json +import subprocess +from dataclasses import dataclass, field +from typing import Callable, ClassVar, FrozenSet, Mapping, Optional, Sequence, Type + +from .pipeline import Filter, Reader, Stage, Writer +from . import libpdalpython + +import shlex + +StreamableTypes: FrozenSet + + +@dataclass +class Option: + name: str + description: str + default: Optional[str] = None + + def __repr__(self) -> str: + if self.default is not None: + return f"{self.name}={self.default!r}: {self.description}" + else: + return f"{self.name}: {self.description}" + + +@dataclass +class Driver: + name: str + short_name: str = field(init=False) + type: Type[Stage] = field(init=False) + description: str + options: Sequence[Option] + + def __post_init__(self) -> None: + prefix, _, suffix = self.name.partition(".") + self.type = self._prefix_to_type[prefix] + self.short_name = suffix + + @property + def factory(self) -> Callable[..., Stage]: + if self.options and self.options[0].name == "filename": + factory = lambda filename, **kwargs: self.type( + filename=filename, type=self.name, **kwargs + ) + else: + factory = lambda **kwargs: self.type(type=self.name, **kwargs) + factory.__name__ = self.short_name + factory.__qualname__ = f"{self.type.__name__}.{self.short_name}" + factory.__module__ = self.type.__module__ + factory.__doc__ = self.description + if self.options: + factory.__doc__ += "\n\n" + factory.__doc__ += "\n".join(map(repr, self.options)) + return factory + + _prefix_to_type: ClassVar[Mapping[str, Type[Stage]]] = { + "readers": Reader, + "filters": Filter, + "writers": Writer, + } + + +def inject_pdal_drivers() -> None: + + drivers = libpdalpython.getDrivers() + options = libpdalpython.getOptions() + + streamable = [] + for d in drivers: + name = d["name"] + d_options = [Option(**option_dict) for option_dict in (options.get(name) or ())] + # move filename option first + try: + i = next(i for i, opt in enumerate(d_options) if opt.name == "filename") + d_options.insert(0, d_options.pop(i)) + except StopIteration: + pass + driver = Driver(name, d["description"], d_options) + setattr(driver.type, driver.short_name, staticmethod(driver.factory)) + if d["streamable"]: + streamable.append(driver.name) + global StreamableTypes + StreamableTypes = frozenset(streamable) diff --git a/src/pdal/export.hpp b/src/pdal/export.hpp new file mode 100644 index 00000000..5a6c9aea --- /dev/null +++ b/src/pdal/export.hpp @@ -0,0 +1,44 @@ +/****************************************************************************** +* Copyright (c) 2025, Hobu Inc. (info@hobu.co) +* +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following +* conditions are met: +* +* * Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* * Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in +* the documentation and/or other materials provided +* with the distribution. +* * Neither the name of Hobu, Inc. nor the +* names of its contributors may be used to endorse or promote +* products derived from this software without specific prior +* written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS +* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +* OF SUCH DAMAGE. +****************************************************************************/ + + +#include + +#ifndef PDAL_EXPORT +# define PDAL_EXPORT PDAL_DLL +#endif + +#ifndef PDAL_DLL +# define PDAL_DLL PDAL_EXPORT +#endif diff --git a/src/pdal/libpdalpython.cpp b/src/pdal/libpdalpython.cpp new file mode 100644 index 00000000..09118fbf --- /dev/null +++ b/src/pdal/libpdalpython.cpp @@ -0,0 +1,353 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +#define NPY_TARGET_VERSION NPY_1_22_API_VERSION +#define NPY_NO_DEPRECATED_API NPY_1_22_API_VERSION + +#define PY_ARRAY_UNIQUE_SYMBOL PDAL_ARRAY_API + +#include + +#include "PyArray.hpp" +#include "PyDimension.hpp" +#include "PyPipeline.hpp" +#include "StreamableExecutor.hpp" + +namespace py = pybind11; + +namespace pdal { + using namespace py::literals; + + py::object getInfo() { + return py::module_::import("types").attr("SimpleNamespace")( + "version"_a = pdal::Config::versionString(), + "major"_a = pdal::Config::versionMajor(), + "minor"_a = pdal::Config::versionMinor(), + "patch"_a = pdal::Config::versionPatch(), + "debug"_a = pdal::Config::debugInformation(), + "sha1"_a = pdal::Config::sha1(), + "plugin"_a = pdal::Config::pluginInstallPath() + ); + }; + + std::vector getDrivers() { + std::vector drivers; + + pdal::StageFactory f(false); + pdal::PluginManager::loadAll(); + pdal::StringList stages = pdal::PluginManager::names(); + + pdal::StageExtensions& extensions = pdal::PluginManager::extensions(); + for (auto name : stages) + { + pdal::Stage *s = f.createStage(name); + std::string description = pdal::PluginManager::description(name); + std::string link = pdal::PluginManager::link(name); + std::vector extension_names = extensions.extensions(name); + + py::dict d( + "name"_a=name, + "description"_a=description, + "streamable"_a=s->pipelineStreamable(), + "extensions"_a=extension_names + ); + f.destroyStage(s); + drivers.push_back(std::move(d)); + } + return drivers; + + }; + + py::object getOptions() { + py::object json = py::module_::import("json"); + py::dict stageOptions; + + pdal::StageFactory f; + pdal::PluginManager::loadAll(); + pdal::StringList stages = pdal::PluginManager::names(); + + for (auto name : stages) + { + pdal::Stage *s = f.createStage(name); + pdal::ProgramArgs args; + s->addAllArgs(args); + std::ostringstream ostr; + args.dump3(ostr); + py::str pystring(ostr.str()); + pystring.attr("strip"); + + py::object j; + + try { + j = json.attr("loads")(pystring); + } catch (py::error_already_set &e) { + std::cerr << "failed:" << name << "'" << ostr.str() << "'" < getDimensions() { + py::object np = py::module_::import("numpy"); + py::object dtype = np.attr("dtype"); + std::vector dims; + for (const auto& dim: getValidDimensions()) + { + py::dict d( + "name"_a=dim.name, + "description"_a=dim.description, + "dtype"_a=dtype(dim.type + std::to_string(dim.size)) + ); + dims.push_back(std::move(d)); + } + return dims; + }; + + std::string getReaderDriver(std::filesystem::path const& p) + { + return StageFactory::inferReaderDriver(p.string()); + } + + std::string getWriterDriver(std::filesystem::path const& p) + { + return StageFactory::inferWriterDriver(p.string()); + } + + using pdal::python::PipelineExecutor; + using pdal::python::StreamableExecutor; + + class PipelineIterator : public StreamableExecutor { + public: + using StreamableExecutor::StreamableExecutor; + + py::object getSchema() { + return py::module_::import("json").attr("loads")(StreamableExecutor::getSchema()); + } + + py::array executeNext() { + PyArrayObject* arr(StreamableExecutor::executeNext()); + if (!arr) + throw py::stop_iteration(); + + return py::reinterpret_steal((PyObject*)arr); + } + + py::object getMetadata() { + py::object json = py::module_::import("json"); + + std::stringstream strm; + MetadataNode root = (StreamableExecutor::getMetadata()).clone("metadata"); + pdal::Utils::toJSON(root, strm); + + + py::bytes pybytes(strm.str()); + py::str pystring ( pybytes.attr("decode")("utf-8", "ignore")); + + py::object j; + j = json.attr("loads")(pystring); + + return j; + + } + + }; + + class Pipeline { + public: + point_count_t execute(pdal::StringList allowedDims) { + point_count_t response(0); + { + py::gil_scoped_release release; + response = getExecutor()->execute(allowedDims); + } + return response; + } + + point_count_t executeStream(point_count_t streamLimit, pdal::StringList allowedDims) { + point_count_t response(0); + { + py::gil_scoped_release release; + response = getExecutor()->executeStream(streamLimit, allowedDims); + } + return response; + } + + std::unique_ptr iterator(int chunk_size, int prefetch, pdal::StringList allowedDims) { + return std::unique_ptr(new PipelineIterator( + getJson(), _inputs, _loglevel, chunk_size, prefetch, allowedDims + )); + } + + void setInputs(const std::vector& inputs) { + _inputs.clear(); + for (const auto& input_obj: inputs) { + if (py::isinstance(input_obj)) { + // Backward compatibility for accepting list of numpy arrays + auto ndarray = input_obj.cast(); + _inputs.push_back(std::make_shared((PyArrayObject*)ndarray.ptr())); + } else { + // Now expected to be a list of pairs: (numpy array, stream handler) + auto input = input_obj.cast>(); + _inputs.push_back(std::make_shared( + (PyArrayObject*)input.first.ptr(), + input.second ? + std::make_shared(input.second) + : nullptr)); + } + } + delExecutor(); + } + + int getLoglevel() { return _loglevel; } + + void setLogLevel(int level) { _loglevel = level; delExecutor(); } + + std::string getLog() { return getExecutor()->getLog(); } + + std::string getPipeline() { return getExecutor()->getPipeline(); } + std::string getSrsWKT2() { return getExecutor()->getSrsWKT2(); } + + py::object getQuickInfo() { + py::object json = py::module_::import("json"); + + std::string response; + { + py::gil_scoped_release release; + response = getExecutor()->getQuickInfo(); + } + py::bytes pybytes(response); + + py::str pystring ( pybytes.attr("decode")("utf-8", "ignore")); + pystring.attr("strip"); + + py::object j; + j = json.attr("loads")(pystring); + + return j; + + } + + py::object getMetadata() { + py::object json = py::module_::import("json"); + + py::bytes pybytes(getExecutor()->getMetadata()); + py::str pystring ( pybytes.attr("decode")("utf-8", "ignore")); + + py::object j; + j = json.attr("loads")(pystring); + + return j; + + } + + py::object getSchema() { + return py::module_::import("json").attr("loads")(getExecutor()->getSchema()); + } + + std::vector getArrays() { + std::vector output; + for (const auto &view: getExecutor()->views()) { + PyArrayObject* arr(pdal::python::viewToNumpyArray(view)); + output.push_back(py::reinterpret_steal((PyObject*)arr)); + } + return output; + } + + std::vector getMeshes() { + std::vector output; + for (const auto &view: getExecutor()->views()) { + PyArrayObject* arr(pdal::python::meshToNumpyArray(view->mesh())); + output.push_back(py::reinterpret_steal((PyObject*)arr)); + } + return output; + } + + std::string getJson() const { + PYBIND11_OVERRIDE_PURE_NAME(std::string, Pipeline, "toJSON", getJson); + } + + bool hasInputs() { return !_inputs.empty(); } + + void copyInputs(const Pipeline& other) { _inputs = other._inputs; } + + void delExecutor() { _executor.reset(); } + + PipelineExecutor* getExecutor() { + // We need to acquire the GIL before we create the executor + // because this method does Python init stuff but pybind11 doesn't + // automatically encapsulate it with a gil_scoped_acquire like it + // does for all of the other methods it knows about + py::gil_scoped_acquire acquire; + if (!_executor) + _executor.reset(new PipelineExecutor(getJson(), _inputs, _loglevel)); + return _executor.get(); + } + + private: + std::unique_ptr _executor; + std::vector> _inputs; + int _loglevel; + }; + + + + PYBIND11_MODULE(libpdalpython, m) + { + _import_array(); + + py::class_(m, "PipelineIterator") + .def("__iter__", [](PipelineIterator &it) -> PipelineIterator& { return it; }) + .def("__next__", &PipelineIterator::executeNext) + .def_property_readonly("log", &PipelineIterator::getLog) + .def_property_readonly("schema", &PipelineIterator::getSchema) + .def_property_readonly("srswkt2", &PipelineIterator::getSrsWKT2) + .def_property_readonly("pipeline", &PipelineIterator::getPipeline) + .def_property_readonly("metadata", &PipelineIterator::getMetadata); + + + py::class_(m, "Pipeline") + .def(py::init<>()) + .def("execute", &Pipeline::execute, py::arg("allowed_dims") =py::list()) + .def("execute_streaming", &Pipeline::executeStream, "chunk_size"_a=10000, py::arg("allowed_dims") =py::list()) + .def("iterator", &Pipeline::iterator, "chunk_size"_a=10000, "prefetch"_a=0, py::arg("allowed_dims") =py::list()) + .def_property("inputs", nullptr, &Pipeline::setInputs) + .def_property("loglevel", &Pipeline::getLoglevel, &Pipeline::setLogLevel) + .def_property_readonly("log", &Pipeline::getLog) + .def_property_readonly("schema", &Pipeline::getSchema) + .def_property_readonly("srswkt2", &Pipeline::getSrsWKT2) + .def_property_readonly("pipeline", &Pipeline::getPipeline) + .def_property_readonly("quickinfo", &Pipeline::getQuickInfo) + .def_property_readonly("metadata", &Pipeline::getMetadata) + .def_property_readonly("arrays", &Pipeline::getArrays) + .def_property_readonly("meshes", &Pipeline::getMeshes) + .def_property_readonly("_has_inputs", &Pipeline::hasInputs) + .def("_copy_inputs", &Pipeline::copyInputs) + .def("toJSON", &Pipeline::getJson) + .def("_del_executor", &Pipeline::delExecutor); + m.def("getInfo", &getInfo); + m.def("getDrivers", &getDrivers); + m.def("getOptions", &getOptions); + m.def("getDimensions", &getDimensions); + m.def("infer_reader_driver", &getReaderDriver); + m.def("infer_writer_driver", &getWriterDriver); + + if (pdal::Config::versionMajor() < 2) + throw pybind11::import_error("PDAL version must be >= 2.7"); + + if (pdal::Config::versionMajor() == 2 && pdal::Config::versionMinor() < 7) + throw pybind11::import_error("PDAL version must be >= 2.7"); + }; + +}; // namespace pdal diff --git a/src/pdal/pipeline.py b/src/pdal/pipeline.py new file mode 100644 index 00000000..60a181c0 --- /dev/null +++ b/src/pdal/pipeline.py @@ -0,0 +1,300 @@ +from __future__ import annotations + +import json +import logging +from typing import Any, Container, Dict, Iterator, List, Optional, Sequence, Union, cast, Callable + +import numpy as np +import pathlib + +try: + from meshio import Mesh +except ModuleNotFoundError: # pragma: no cover + Mesh = None + +try: + from pandas import DataFrame +except ModuleNotFoundError: # pragma: no cover + DataFrame = None + +try: + from geopandas import GeoDataFrame, points_from_xy +except ModuleNotFoundError: # pragma: no cover + GeoDataFrame = points_from_xy = None + +from . import drivers, libpdalpython + +LogLevelToPDAL = { + logging.ERROR: 0, + logging.WARNING: 1, + logging.INFO: 2, + logging.DEBUG: 8, # pdal::LogLevel::Debug5 +} +LogLevelFromPDAL = {v: k for k, v in LogLevelToPDAL.items()} + + +class Pipeline(libpdalpython.Pipeline): + def __init__( + self, + spec: Union[None, str, Sequence[Stage]] = None, + arrays: Sequence[np.ndarray] = (), + loglevel: int = logging.ERROR, + json: Optional[str] = None, + dataframes: Sequence[DataFrame] = (), + stream_handlers: Sequence[Callable[[], int]] = (), + ): + + if json: + if spec and json: + raise ValueError("provide 'spec' or 'json' arguments, not both") + spec = json + + # Convert our data frames to Numpy Structured Arrays + if dataframes: + arrays = [df.to_records() if not "geometry" in df.columns else df.drop(columns=["geometry"]).to_records() for df in dataframes] + + super().__init__() + self._stages: List[Stage] = [] + if spec: + stages = _parse_stages(spec) if isinstance(spec, str) else spec + for stage in stages: + self |= stage + + if stream_handlers: + if len(stream_handlers) != len(arrays): + raise RuntimeError("stream_handlers must match the number of specified input arrays / dataframes") + self.inputs = [(a, h) for a, h in zip(arrays, stream_handlers)] + else: + self.inputs = [(a, None) for a in arrays] + + self.loglevel = loglevel + + def __getstate__(self): + state = self.pipeline + return state + + def __setstate__(self, state): + self.__init__(state) + + @property + def stages(self) -> List[Stage]: + return list(self._stages) + + @property + def streamable(self) -> bool: + return all(stage.streamable for stage in self._stages) + + @property + def loglevel(self) -> int: + return LogLevelFromPDAL[super().loglevel] + + @loglevel.setter + def loglevel(self, value: int) -> None: + try: + loglevel = LogLevelToPDAL[value] + except KeyError: + raise ValueError(f"Invalid level {value!r}") + # super() property setter is not supported + libpdalpython.Pipeline.loglevel.__set__(self, loglevel) + + def __ior__(self, other: Union[Stage, Pipeline]) -> Pipeline: + if isinstance(other, Stage): + self._stages.append(other) + elif isinstance(other, Pipeline): + if self._stages and other._has_inputs: + raise ValueError( + "A pipeline with inputs cannot follow another pipeline" + ) + self._stages.extend(other._stages) + else: + raise TypeError(f"Expected Stage or Pipeline, not {other}") + self._del_executor() + return self + + def __or__(self, other: Union[Stage, Pipeline]) -> Pipeline: + new = self.__copy__() + new |= other + return new + + def __copy__(self) -> Pipeline: + clone = self.__class__(loglevel=self.loglevel) + clone._copy_inputs(self) + clone |= self + return clone + + def get_meshio(self, idx: int) -> Optional[Mesh]: + if Mesh is None: # pragma: no cover + raise RuntimeError( + "The get_meshio function can only be used if you have installed meshio. " + "Try pip install meshio" + ) + array = self.arrays[idx] + mesh = self.meshes[idx] + if len(mesh) == 0: + return None + return Mesh( + np.stack((array["X"], array["Y"], array["Z"]), 1), + [("triangle", np.stack((mesh["A"], mesh["B"], mesh["C"]), 1))], + ) + + def get_dataframe(self, idx: int) -> Optional[DataFrame]: + if DataFrame is None: + raise RuntimeError("Pandas support requires Pandas to be installed") + + return DataFrame(self.arrays[idx]) + + def get_geodataframe(self, idx: int, xyz: bool=False, crs: Any=None) -> Optional[GeoDataFrame]: + if GeoDataFrame is None: + raise RuntimeError("GeoPandas support requires GeoPandas to be installed") + df = DataFrame(self.arrays[idx]) + coords = [df["X"], df["Y"], df["Z"]] if xyz else [df["X"], df["Y"]] + geometry = points_from_xy(*coords) + gdf = GeoDataFrame( + df, + geometry=geometry, + crs=crs, + ) + df = coords = geometry = None + return gdf + + def _get_json(self) -> str: + return self.toJSON() + + def toJSON(self) -> str: + options_list = [] + stage2tag: Dict[Stage, str] = {} + stages = self._stages + if all(isinstance(stage, Reader) for stage in stages): + stages = [*stages, Filter.merge()] + for stage in stages: + stage2tag[stage] = stage.tag or _generate_tag(stage, stage2tag.values()) + options = stage.options + for option in options: + if isinstance(options[option], pathlib.Path): + options[option] = str(options[option]) + options["tag"] = stage2tag[stage] + options["type"] = stage.type + inputs = _get_input_tags(stage, stage2tag) + if inputs: + options["inputs"] = inputs + options_list.append(options) + + return json.dumps(options_list) + + +class Stage: + def __init__(self, **options: Any): + self._options = options + + @property + def type(self) -> str: + return cast(str, self._options["type"]) + + @property + def streamable(self) -> bool: + return self.type in drivers.StreamableTypes + + @property + def tag(self) -> Optional[str]: + return self._options.get("tag") + + @property + def inputs(self) -> List[Union[Stage, str]]: + inputs = self._options.get("inputs", ()) + return [inputs] if isinstance(inputs, (Stage, str)) else list(inputs) + + @property + def options(self) -> Dict[str, Any]: + return dict(self._options) + + def pipeline(self, *arrays: np.ndarray, loglevel: int = logging.ERROR) -> Pipeline: + return Pipeline((self,), arrays, loglevel) + + def __or__(self, other: Union[Stage, Pipeline]) -> Pipeline: + return Pipeline((self, other)) + + +class InferableTypeStage(Stage): + def __init__(self, filename: Optional[str] = None, **options: Any): + if filename: + options["filename"] = filename + super().__init__(**options) + + @property + def type(self) -> str: + try: + return super().type + except KeyError: + filename = self._options.get("filename") + return str(self._infer_type(filename) if filename else "") + + _infer_type = staticmethod(lambda filename: "") + + +class Reader(InferableTypeStage): + _infer_type = staticmethod(libpdalpython.infer_reader_driver) + + +class Filter(Stage): + def __init__(self, type: str, **options: Any): + super().__init__(type=type, **options) + + +class Writer(InferableTypeStage): + _infer_type = staticmethod(libpdalpython.infer_writer_driver) + + +def _parse_stages(text: str) -> Iterator[Stage]: + json_stages = json.loads(text) + if isinstance(json_stages, dict): + json_stages = json_stages.get("pipeline") + if not isinstance(json_stages, list): + raise ValueError("root element is not a pipeline") + + last = len(json_stages) - 1 + for i, options in enumerate(json_stages): + if not isinstance(options, dict): + if isinstance(options, str): + options = {"filename": options} + else: + raise ValueError("A stage element must be string or dict") + + stage_type = options.get("type") + if stage_type: + is_reader = stage_type.startswith("readers.") + else: + # The type is inferred from a filename as a reader if it's not + # the last stage or if there's only one. + is_reader = i == 0 or i != last + + if is_reader: + yield Reader(**options) + elif not stage_type or stage_type.startswith("writers."): + yield Writer(**options) + else: + yield Filter(**options) + + +def _generate_tag(stage: Stage, tags: Container[str]) -> str: + tag_prefix = stage.type.replace(".", "_") + i = 1 + while True: + tag = tag_prefix + str(i) + if tag not in tags: + return tag + i += 1 + + +def _get_input_tags(stage: Stage, stage2tag: Dict[Stage, str]) -> List[str]: + tags = [] + for input in stage.inputs: + if isinstance(input, Stage): + try: + tags.append(stage2tag[input]) + except KeyError: + raise RuntimeError( + f"Invalid pipeline: Undefined stage " f"{input.tag or input.type!r}" + ) + else: + tags.append(input) + return tags diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index 8a3c8545..00000000 --- a/test/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -import sys -DATADIRECTORY = sys.argv.pop() -from test.test_pipeline import test_suite diff --git a/test/data/bad.json b/test/data/bad.json new file mode 100644 index 00000000..13ee85cb --- /dev/null +++ b/test/data/bad.json @@ -0,0 +1,9 @@ +{ + "pipeline": [ + "nofile.las", + { + "type": "filters.sort", + "dimension": "X" + } + ] +} diff --git a/test/data/bad.py b/test/data/bad.py new file mode 100644 index 00000000..dfb3ec15 --- /dev/null +++ b/test/data/bad.py @@ -0,0 +1 @@ +Reader("nofile.las") | Filter.sort(dimension="X") diff --git a/test/data/chip.py b/test/data/chip.py new file mode 100644 index 00000000..6b229586 --- /dev/null +++ b/test/data/chip.py @@ -0,0 +1 @@ +Reader("test/data/autzen-utm.las") | Filter.chipper(capacity=25) | Writer("auzen-utm-chipped-25.las") diff --git a/test/data/mesh.json b/test/data/mesh.json new file mode 100644 index 00000000..e44d9356 --- /dev/null +++ b/test/data/mesh.json @@ -0,0 +1,5 @@ +[ + "test/data/1.2-with-color.las", + {"type": "filters.splitter", "length": 1000}, + {"type": "filters.delaunay"} +] \ No newline at end of file diff --git a/test/data/mesh.py b/test/data/mesh.py new file mode 100644 index 00000000..635feea1 --- /dev/null +++ b/test/data/mesh.py @@ -0,0 +1 @@ +Reader("test/data/1.2-with-color.las") | Filter.splitter(length=1000) | Filter.delaunay() diff --git a/test/data/perlin.npy b/test/data/perlin.npy deleted file mode 100644 index 457a1356..00000000 Binary files a/test/data/perlin.npy and /dev/null differ diff --git a/test/data/range.json b/test/data/range.json new file mode 100644 index 00000000..df48ac09 --- /dev/null +++ b/test/data/range.json @@ -0,0 +1,7 @@ +[ + "test/data/autzen-utm.las", + { + "type": "filters.range", + "limits": "Intensity[80:120)" + } +] diff --git a/test/data/range.py b/test/data/range.py new file mode 100644 index 00000000..03a8aa26 --- /dev/null +++ b/test/data/range.py @@ -0,0 +1 @@ +Reader("test/data/autzen-utm.las") | Filter.range(limits="Intensity[80:120)") diff --git a/test/data/reproject.json b/test/data/reproject.json index 87cdc1b3..d0bd3ebd 100644 --- a/test/data/reproject.json +++ b/test/data/reproject.json @@ -7,7 +7,7 @@ { "type":"filters.python", "function":"filter", - "source":"import numpy as np\n\ndef filter(ins,outs):\n\tcls = ins['Classification']\n\n\tkeep_classes = [1]\n\n\t# Use the first test for our base array.\n\tkeep = np.equal(cls, keep_classes[0])\n\n\t# For 1:n, test each predicate and join back\n\t# to our existing predicate array\n\tfor k in range(1,len(keep_classes)):\n\t\tt = np.equal(cls, keep_classes[k])\n\t\tkeep = keep + t\n\n\touts['Mask'] = keep\n\treturn True", + "source":"import numpy as np\n\ndef filter(ins,outs):\n\tprint('entered filter()')\n\tcls = ins['Classification']\n\n\tkeep_classes = [1]\n\n\t# Use the first test for our base array.\n\tkeep = np.equal(cls, keep_classes[0])\n\n\t# For 1:n, test each predicate and join back\n\t# to our existing predicate array\n\tfor k in range(1,len(keep_classes)):\n\t\tt = np.equal(cls, keep_classes[k])\n\t\tkeep = keep + t\n\n\touts['Mask'] = keep\n\tprint('exiting filter()')\n\treturn True", "module":"anything" }, "out2.las" diff --git a/test/data/reproject.py b/test/data/reproject.py new file mode 100644 index 00000000..e74514c0 --- /dev/null +++ b/test/data/reproject.py @@ -0,0 +1,28 @@ +( + Reader(filename="test/data/1.2-with-color.las", spatialreference="EPSG:2993") + | + Filter.python(function="filter", module="anything", source=""" +import numpy as np + + +def filter(ins, outs): + print("entered filter()") + cls = ins["Classification"] + keep_classes = [1] + + # Use the first test for our base array. + keep = np.equal(cls, keep_classes[0]) + + # For 1:n, test each predicate and join back + # to our existing predicate array + for k in range(1, len(keep_classes)): + t = np.equal(cls, keep_classes[k]) + keep = keep + t + + outs["Mask"] = keep + print("exiting filter()") + return True +""") + | + Writer("out2.las") +) diff --git a/test/data/simple.laz b/test/data/simple.laz new file mode 100644 index 00000000..6f774c5b Binary files /dev/null and b/test/data/simple.laz differ diff --git a/test/data/sort.py b/test/data/sort.py new file mode 100644 index 00000000..8bc741a1 --- /dev/null +++ b/test/data/sort.py @@ -0,0 +1 @@ +Reader("test/data/1.2-with-color.las") | Filter.sort(dimension="X") diff --git a/test/test_pio.py b/test/test_pio.py deleted file mode 100644 index bcde6bcb..00000000 --- a/test/test_pio.py +++ /dev/null @@ -1,57 +0,0 @@ -import unittest -import json - -from pdal import pio - -dummy_pipeline = """{ - "pipeline": [ - { - "type": "readers.ply", - "filename": "dummyinput.ply" - }, - { - "type": "filters.outlier", - "method": "statistical", - "mean_k": 16, - "multiplier": 1.0 - }, - { - "type": "filters.range", - "limits": "Classification![7:7]" - }, - { - "type": "filters.normal" - }, - { - "type": "writers.ply", - "storage_mode": "ascii", - "precision": 4, - "filename": "dummyoutput.ply", - "dims": "X,Y,Z,Red,Green,Blue,NormalX,NormalY,NormalZ" - } - ] -}""" - - - -class TestPIOBasics(unittest.TestCase): - def test_pipeline_construction(self): - pipeline = (pio.readers.ply(filename="dummyinput.ply") + - pio.filters.outlier(method="statistical", mean_k=16, multiplier=1.0) + - pio.filters.range(limits="Classification![7:7]") + - pio.filters.normal() + pio.writers.ply(storage_mode="ascii", precision=4, filename="dummyoutput.ply", - dims="X,Y,Z,Red,Green,Blue,NormalX,NormalY,NormalZ")) - - self.assertIsInstance(pipeline, pio.PipelineSpec) - self.assertEqual(len(list(pipeline.stages)), 5) - self.assertEqual(json.dumps(pipeline.spec, indent=2), dummy_pipeline) - - auto_reader = pio.readers.auto(filename="dummyinput.las") - auto_writer = pio.writers.auto(filename="dummyoutput.las") - - self.assertIn("filename", auto_reader.spec) - self.assertNotIn("type", auto_reader.spec) - self.assertIn("filename", auto_reader.spec) - self.assertNotIn("type", auto_writer.spec) - self.assertEqual(auto_reader.prefix, "readers") - self.assertEqual(auto_writer.prefix, "writers") diff --git a/test/test_pipeline.py b/test/test_pipeline.py index e16124d9..46e0a18d 100644 --- a/test/test_pipeline.py +++ b/test/test_pipeline.py @@ -1,176 +1,456 @@ -import unittest -import pdal +import json +import logging import os +import sys + +from typing import List +from itertools import product import numpy as np -from packaging.version import Version +import pytest + +import pdal +import pathlib + +DATADIRECTORY = os.path.join(os.path.dirname(__file__), "data") + -DATADIRECTORY = "./test/data" +def a_filter(ins, outs): + return True -bad_json = u""" -{ - "pipeline": [ - "nofile.las", - { - "type": "filters.sort", - "dimension": "X" - } - ] -} -""" +def compare_structured_arrays(arr1, arr2): + for field in arr1.dtype.names: + equal = np.all(np.equal(arr1[field], arr2[field])) + if not equal: + return False + return True +def get_pipeline(filename): + with open(os.path.join(DATADIRECTORY, filename), "r") as f: + if filename.endswith(".json"): + pipeline = pdal.Pipeline(f.read()) + elif filename.endswith(".py"): + pipeline = eval(f.read(), vars(pdal)) + return pipeline -class PDALTest(unittest.TestCase): - def fetch_json(self, filename): - import os - fn = DATADIRECTORY + os.path.sep + filename - output = '' - with open(fn, 'rb') as f: - output = f.read().decode('UTF-8') - return output +def test_dimensions(): + """Ask PDAL for its valid dimensions list""" + dims = pdal.dimensions + assert len(dims) > 0 -class TestPipeline(PDALTest): - @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')), - "missing test data") - def test_construction(self): +class TestPipeline: + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_construction(self, filename): """Can we construct a PDAL pipeline""" - json = self.fetch_json('sort.json') - r = pdal.Pipeline(json) + assert isinstance(get_pipeline(filename), pdal.Pipeline) - @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')), - "missing test data") - def test_execution(self): + # construct Pipeline from a sequence of stages + r = pdal.Reader("r") + f = pdal.Filter("f") + for spec in (r, f), [r, f]: + p = pdal.Pipeline(spec) + assert isinstance(p, pdal.Pipeline) + assert len(p.stages) == 2 + + @pytest.mark.parametrize( + "pipeline", + [ + "{}", + '{"foo": []}', + "[1, 2]", + '{"pipeline": [["a.las", "b.las"], "c.las"]}', + ], + ) + def test_invalid_json(self, pipeline): + """Do we complain with bad pipelines""" + json.loads(pipeline) + with pytest.raises(ValueError): + pdal.Pipeline(pipeline) + + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_execute(self, filename): """Can we execute a PDAL pipeline""" - x = self.fetch_json('sort.json') - r = pdal.Pipeline(x) - r.validate() - r.execute() - self.assertGreater(len(r.pipeline), 200) + r = get_pipeline(filename) + count = r.execute() + assert count == 1065 + + @pytest.mark.parametrize("filename", ["range.json", "range.py"]) + def test_execute_streaming(self, filename): + r = get_pipeline(filename) + assert r.streamable + count = r.execute() + count2 = r.execute_streaming(chunk_size=100) + assert count == count2 + - def test_validate(self): + @pytest.mark.parametrize("filename", ["range.json", "range.py"]) + def test_subsetstreaming(self, filename): + """Can we fetch a subset of PDAL dimensions as a numpy array while streaming""" + r = get_pipeline(filename) + limit = ['X','Y','Z','Intensity'] + arrays = list(r.iterator(chunk_size=100,allowed_dims=limit)) + assert len(arrays) == 11 + assert len(arrays[0].dtype) == 4 + + + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_execute_streaming_non_streamable(self, filename): + r = get_pipeline(filename) + assert not r.streamable + with pytest.raises(RuntimeError) as info: + r.execute_streaming() + assert "Attempting to use stream mode" in str(info.value) + + @pytest.mark.parametrize("filename", ["bad.json", "bad.py"]) + def test_validate(self, filename): """Do we complain with bad pipelines""" - r = pdal.Pipeline(bad_json) - with self.assertRaises(RuntimeError): - r.validate() + r = get_pipeline(filename) + with pytest.raises(RuntimeError) as info: + r.execute() + if os.name == "nt": + assert "Unable to open stream for" in str(info.value) + else: + assert "No such file or directory" in str(info.value) - @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')), - "missing test data") - def test_array(self): + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_array(self, filename): """Can we fetch PDAL data as a numpy array""" - json = self.fetch_json('sort.json') - r = pdal.Pipeline(json) - r.validate() + r = get_pipeline(filename) r.execute() arrays = r.arrays - self.assertEqual(len(arrays), 1) + assert len(arrays) == 1 a = arrays[0] - self.assertAlmostEqual(a[0][0], 635619.85, 7) - self.assertAlmostEqual(a[1064][2], 456.92, 7) + assert a[0][0] == 635619.85 + assert a[1064][2] == 456.92 + + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_subsetarray(self, filename): + """Can we fetch a subset of PDAL dimensions as a numpy array""" + r = get_pipeline(filename) + limit = ['X','Y','Z'] + r.execute(allowed_dims=limit) + arrays = r.arrays + assert len(arrays) == 1 + assert len(arrays[0].dtype) == 3 + - @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')), - "missing test data") - def test_metadata(self): + + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_metadata(self, filename): """Can we fetch PDAL metadata""" - json = self.fetch_json('sort.json') - r = pdal.Pipeline(json) - r.validate() + r = get_pipeline(filename) + with pytest.raises(RuntimeError) as info: + r.metadata + assert "Pipeline has not been executed" in str(info.value) + r.execute() - metadata = r.metadata - import json - j = json.loads(metadata) - self.assertEqual(j["metadata"]["readers.las"][0]["count"], 1065) + assert r.metadata["metadata"]["readers.las"]["count"] == 1065 + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_schema(self, filename): + """Fetching a schema works""" + r = get_pipeline(filename) + with pytest.raises(RuntimeError) as info: + r.schema + assert "Pipeline has not been executed" in str(info.value) + + r.execute() + assert r.schema["schema"]["dimensions"][0]["name"] == "X" - @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')), - "missing test data") - def test_no_execute(self): + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_pipeline(self, filename): + """Can we fetch PDAL pipeline string""" + r = get_pipeline(filename) + r.execute() + # filename might be an object in PDAL 2.9+ + # https://github.com/PDAL/PDAL/issues/4751 + + returned = json.loads(r.pipeline) + expected = { "pipeline": [ + { + "filename": "test/data/1.2-with-color.las", + "tag": "readers_las1", + "type": "readers.las", + }, + { + "dimension": "X", + "inputs": ["readers_las1"], + "tag": "filters_sort1", + "type": "filters.sort", + }, + ] + } + try: + assert returned['pipeline'][0]['filename'] == "test/data/1.2-with-color.las" + except AttributeError: + assert returned['pipeline'][0]['filename']['path'] == "test/data/1.2-with-color.las" + + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_no_execute(self, filename): """Does fetching arrays without executing throw an exception""" - json = self.fetch_json('sort.json') - r = pdal.Pipeline(json) - with self.assertRaises(RuntimeError): + r = get_pipeline(filename) + with pytest.raises(RuntimeError) as info: r.arrays -# -# @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'reproject.json')), -# "missing test data") -# def test_logging(self): -# """Can we fetch log output""" -# json = self.fetch_json('reproject.json') -# r = pdal.Pipeline(json) -# r.loglevel = 8 -# r.validate() -# count = r.execute() -# self.assertEqual(count, 789) -# self.assertEqual(r.log.split()[0], '(pypipeline') -# - @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'sort.json')), - "missing test data") - def test_schema(self): - """Fetching a schema works""" - json = self.fetch_json('sort.json') - r = pdal.Pipeline(json) - r.validate() - r.execute() - self.assertEqual(r.schema['schema']['dimensions'][0]['name'], 'X') + assert "Pipeline has not been executed" in str(info.value) - @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'chip.json')), - "missing test data") - def test_merged_arrays(self): - """Can we fetch multiple point views from merged PDAL data """ - json = self.fetch_json('chip.json') - r = pdal.Pipeline(json) - r.validate() + @pytest.mark.parametrize("filename", ["chip.json", "chip.py"]) + def test_merged_arrays(self, filename): + """Can we fetch multiple point views from merged PDAL data""" + r = get_pipeline(filename) r.execute() arrays = r.arrays - self.assertEqual(len(arrays), 43) + assert len(arrays) == 43 + + @pytest.mark.parametrize("filename", ["chip.json", "chip.py"]) + def test_stages(self, filename): + """Can we break up a pipeline as a sequence of stages""" + stages = pdal.Reader("test/data/autzen-utm.las").pipeline().stages + assert len(stages) == 1 + + stages = get_pipeline(filename).stages + assert len(stages) == 3 + + assert isinstance(stages[0], pdal.Reader) + assert stages[0].type == "readers.las" + + assert isinstance(stages[1], pdal.Filter) + assert stages[1].type == "filters.chipper" + + assert isinstance(stages[2], pdal.Writer) + assert stages[2].type == "writers.las" + + def test_pipe_stages(self): + """Can we build a pipeline by piping stages together""" + read = pdal.Reader("test/data/autzen-utm.las") + frange = pdal.Filter.range(limits="Intensity[50:200)") + fsplitter = pdal.Filter.splitter(length=1000) + fdelaunay = pdal.Filter.delaunay(inputs=[frange, fsplitter]) + + # pipe stages together + pipeline = read | frange | fsplitter | fdelaunay + pipeline.execute() + + # pipe a pipeline to a stage + pipeline = read | (frange | fsplitter | fdelaunay) + pipeline.execute() + + # pipe a pipeline to a pipeline + pipeline = (read | frange) | (fsplitter | fdelaunay) + pipeline.execute() + + def test_pipe_stage_errors(self): + """Do we complain with piping invalid objects""" + r = pdal.Reader("r", tag="r") + f = pdal.Filter("f") + w = pdal.Writer("w", inputs=["r", f]) + + with pytest.raises(TypeError): + r | (f, w) + with pytest.raises(TypeError): + (r, f) | w + with pytest.raises(TypeError): + (r, f) | (f, w) + + pipeline = r | w + with pytest.raises(RuntimeError) as info: + pipeline.execute() + assert "Undefined stage 'f'" in str(info.value) + def test_inputs(self): + """Can we combine pipelines with inputs""" + data = np.load(os.path.join(DATADIRECTORY, "test3d.npy")) + f = pdal.Filter.splitter(length=1000) + pipeline = f.pipeline(data) + pipeline.execute() -class TestArrayLoad(PDALTest): + # a pipeline with inputs can be followed by stage/pipeline + (pipeline | pdal.Writer.null()).execute() + (pipeline | (f | pdal.Writer.null())).execute() - @unittest.skipUnless(os.path.exists(os.path.join(DATADIRECTORY, 'perlin.npy')), - "missing test data") + # a pipeline with inputs cannot follow another stage/pipeline + with pytest.raises(ValueError): + pdal.Reader("r") | pipeline + with pytest.raises(ValueError): + (pdal.Reader("r") | f) | pipeline + + def test_infer_stage_type(self): + """Can we infer stage type from the filename""" + assert pdal.Reader("foo.las").type == "readers.las" + assert pdal.Writer("foo.las").type == "writers.las" + assert pdal.Reader("foo.xxx").type == "" + assert pdal.Writer("foo.xxx").type == "" + assert pdal.Reader().type == "" + assert pdal.Writer().type == "" + + def test_streamable(self): + """Can we distinguish streamable from non-streamable stages and pipeline""" + rs = pdal.Reader(type="readers.las", filename="foo") + assert rs.streamable is True + assert pdal.Reader.las("foo").streamable is True + assert pdal.Reader("foo.las").streamable is True + + rn = pdal.Reader(type="readers.pts", filename="foo") + assert rn.streamable is False + assert pdal.Reader.pts("foo").streamable is False + assert pdal.Reader("foo.pts").streamable is False + + fs = pdal.Filter(type="filters.crop") + assert fs.streamable is True + assert pdal.Filter.crop().streamable is True + + fn = pdal.Filter(type="filters.cluster") + assert fn.streamable is False + assert pdal.Filter.cluster().streamable is False + + ws = pdal.Writer(type="writers.ogr", filename="foo") + assert ws.streamable is True + assert pdal.Writer.ogr(filename="foo").streamable is True + assert pdal.Writer("foo.shp").streamable is True + + wn = pdal.Writer(type="writers.glb", filename="foo") + assert wn.streamable is False + assert pdal.Writer.gltf("foo").streamable is False + assert pdal.Writer("foo.glb").streamable is False + + assert (rs | fs | ws).streamable is True + assert (rn | fs | ws).streamable is False + assert (rs | fn | ws).streamable is False + assert (rs | fs | wn).streamable is False + + @pytest.mark.parametrize("filename", ["chip.json", "chip.py"]) + def test_logging(self, filename): + """Can we fetch log output""" + r = get_pipeline(filename) + assert r.loglevel == logging.ERROR + assert r.log == "" + + for loglevel in logging.CRITICAL, -1: + with pytest.raises(ValueError): + r.loglevel = loglevel + + count = r.execute() + assert count == 1065 + assert r.log == "" + + r.loglevel = logging.DEBUG + assert r.loglevel == logging.DEBUG + count = r.execute() + assert count == 1065 + assert "(pypipeline readers.las Debug)" in r.log + assert "(pypipeline Debug) Executing pipeline in standard mode" in r.log + assert "(pypipeline writers.las Debug)" in r.log + + @pytest.mark.skipif( + not hasattr(pdal.Filter, "python"), + reason="filters.python PDAL plugin is not available", + ) + @pytest.mark.parametrize("filename", ["reproject.json", "reproject.py"]) + def test_logging_filters_python(self, filename): + """Can we fetch log output including print() statements from filters.python""" + r = get_pipeline(filename) + assert r.loglevel == logging.ERROR + assert r.log == "" + + for loglevel in logging.CRITICAL, -1: + with pytest.raises(ValueError): + r.loglevel = loglevel + + count = r.execute() + assert count == 789 + assert r.log == "entered filter()\n" + "exiting filter()\n" + + r.loglevel = logging.DEBUG + assert r.loglevel == logging.DEBUG + count = r.execute() + assert count == 789 + assert "(pypipeline readers.las Debug)" in r.log + assert "(pypipeline filters.python Debug)" in r.log + assert "\nentered filter()\n" in r.log + assert "\nexiting filter()\n" in r.log + assert "(pypipeline writers.las Debug)" in r.log + + @pytest.mark.skipif( + not hasattr(pdal.Filter, "python"), + reason="filters.python PDAL plugin is not available", + ) + def test_filters_python(self): + r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las")) + f = pdal.Filter.python(script=__file__, function="a_filter", module="anything") + count = (r | f).execute() + assert count == 1065 + + def test_only_readers(self): + """Does a pipeline that consists of only readers return the merged data""" + read = pdal.Reader("test/data/*.las") + r1 = read.pipeline() + count1 = r1.execute() + array1 = r1.arrays[0] + + r2 = read | read + count2 = r2.execute() + array2 = r2.arrays[0] + + assert count2 == 2 * count1 + np.testing.assert_array_equal(np.concatenate([array1, array1]), array2) + + def test_quickinfo(self): + r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las")) + p = r.pipeline() + info = p.quickinfo + assert 'readers.las' in info.keys() + assert info['readers.las']['num_points'] == 1065 + + def test_quickinfo_offsets_scales(self): + r = pdal.Reader(os.path.join(DATADIRECTORY,"simple.laz")) + p = r.pipeline() + info = p.quickinfo + assert 'readers.las' in info.keys() + assert 'offset_x' in info['readers.las']['metadata'].keys() + assert 'scale_x' in info['readers.las']['metadata'].keys() + assert info['readers.las']['num_points'] == 1065 + + def test_jsonkwarg(self): + pipeline = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las")).pipeline().toJSON() + r = pdal.Pipeline(json=pipeline) + p = r.pipeline + assert 'readers.las' in p + + + +class TestArrayLoad: def test_merged_arrays(self): """Can we load data from a list of arrays to PDAL""" - if Version(pdal.info.version) < Version('1.8'): - return True - data = np.load(os.path.join(DATADIRECTORY, 'test3d.npy')) - + data = np.load(os.path.join(DATADIRECTORY, "test3d.npy")) arrays = [data, data, data] - - json = self.fetch_json('chip.json') - chip =u"""{ - "pipeline":[ - { - "type":"filters.range", - "limits":"Intensity[100:300)" - } - ] -}""" - - p = pdal.Pipeline(chip, arrays) - p.loglevel = 8 - count = p.execute() + filter_intensity = """{ + "pipeline":[ + { + "type":"filters.range", + "limits":"Intensity[100:300)" + } + ] + }""" + p = pdal.Pipeline(filter_intensity, arrays) + p.execute() arrays = p.arrays - self.assertEqual(len(arrays), 3) + assert len(arrays) == 3 for data in arrays: - self.assertEqual(len(data), 12) - self.assertEqual(data['Intensity'].sum(), 1926) + assert len(data) == 12 + assert data["Intensity"].sum() == 1926 def test_read_arrays(self): """Can we read and filter data from a list of arrays to PDAL""" - if Version(pdal.info.version) < Version('1.8'): - return True - # just some dummy data x_vals = [1.0, 2.0, 3.0, 4.0, 5.0] y_vals = [6.0, 7.0, 8.0, 9.0, 10.0] z_vals = [1.5, 3.5, 5.5, 7.5, 9.5] test_data = np.array( [(x, y, z) for x, y, z in zip(x_vals, y_vals, z_vals)], - dtype=[('X', np.float), ('Y', np.float), ('Z', np.float)] + dtype=[("X", float), ("Y", float), ("Z", float)], ) pipeline = """ @@ -183,25 +463,432 @@ def test_read_arrays(self): ] } """ + p = pdal.Pipeline(pipeline, arrays=[test_data]) + count = p.execute() + arrays = p.arrays + assert count == 2 + assert len(arrays) == 1 + + def test_reference_counting(self): + """Can we read and filter data from a list of arrays to PDAL""" + # just some dummy data + x_vals = [1.0, 2.0, 3.0, 4.0, 5.0] + y_vals = [6.0, 7.0, 8.0, 9.0, 10.0] + z_vals = [1.5, 3.5, 5.5, 7.5, 9.5] + test_data = np.array( + [(x, y, z) for x, y, z in zip(x_vals, y_vals, z_vals)], + dtype=[("X", float), ("Y", float), ("Z", float)], + ) - p = pdal.Pipeline(pipeline, arrays=[test_data,]) - p.loglevel = 8 + pipeline = """ + { + "pipeline": [ + { + "type":"filters.range", + "limits":"X[2.5:4.5]" + } + ] + } + """ + p = pdal.Pipeline(pipeline, arrays=[test_data]) count = p.execute() + assert count == 2 + refcount = sys.getrefcount(p.arrays[0]) + assert refcount == 1 + + + +class TestMesh: + @pytest.mark.parametrize("filename", ["sort.json", "sort.py"]) + def test_no_execute(self, filename): + """Does fetching meshes without executing throw an exception""" + r = get_pipeline(filename) + with pytest.raises(RuntimeError) as info: + r.meshes + assert "Pipeline has not been executed" in str(info.value) + + @pytest.mark.parametrize("filename", ["mesh.json", "mesh.py"]) + def test_mesh(self, filename): + """Can we fetch PDAL face data as a numpy array""" + r = get_pipeline(filename) + r.execute() + meshes = r.meshes + assert len(meshes) == 24 + + m = meshes[0] + assert str(m.dtype) == "[('A', ' 1 else chunks[0] + for chunks in in_arrays_chunks + ] + + def get_stream_handler(in_array, in_array_chunks): + in_array_chunks_it = iter(in_array_chunks) + def load_next_chunk(): + try: + next_chunk = next(in_array_chunks_it) + except StopIteration: + return 0 + + chunk_size = next_chunk.size + in_array[:chunk_size]["X"] = next_chunk[:]["X"] + in_array[:chunk_size]["Y"] = next_chunk[:]["Y"] + in_array[:chunk_size]["Z"] = next_chunk[:]["Z"] + + return chunk_size + + return load_next_chunk + + stream_handlers = [ + get_stream_handler(arr, chunks) if len(chunks) > 1 else None + for arr, chunks in zip(in_arrays, in_arrays_chunks) + ] + + expected_count = sum([sum([len(c) for c in chunks]) for chunks in in_arrays_chunks]) + + pipeline = """ + { + "pipeline": [{ + "type": "filters.stats" + }] + } + """ + if use_setter: + p = pdal.Pipeline(pipeline) + p.inputs = [(a, h) for a, h in zip(in_arrays, stream_handlers)] + else: + p = pdal.Pipeline(pipeline, arrays=in_arrays, stream_handlers=stream_handlers) + + count = p.execute() + out_arrays = p.arrays + assert count == expected_count + assert len(out_arrays) == len(in_arrays) + + for in_array_chunks, out_array in zip(in_arrays_chunks, out_arrays): + np.testing.assert_array_equal(out_array, np.concatenate(in_array_chunks)) + + @pytest.mark.parametrize("in_arrays, use_setter", [ + (arrays, use_setter) for arrays, use_setter in product([ + [c[0] for c in ONE_ARRAY_FULL], + [c[0] for c in MULTI_ARRAYS_FULL] + ], ['False', 'True']) + ]) + def test_pipeline_run_backward_compat(self, in_arrays, use_setter: bool): + expected_count = sum([len(a) for a in in_arrays]) + + pipeline = """ + { + "pipeline": [{ + "type": "filters.stats" + }] + } + """ + if use_setter: + p = pdal.Pipeline(pipeline) + p.inputs = in_arrays + else: + p = pdal.Pipeline(pipeline, arrays=in_arrays) + + count = p.execute() + out_arrays = p.arrays + assert count == expected_count + assert len(out_arrays) == len(in_arrays) + + for in_array, out_array in zip(in_arrays, out_arrays): + np.testing.assert_array_equal(out_array, in_array) + + @pytest.mark.parametrize("in_array, invalid_chunk_size", [ + (in_array, invalid_chunk_size) for in_array, invalid_chunk_size in product( + [gen_chunk(1234)], + [-1, 12345]) + ]) + def test_pipeline_fail_with_invalid_chunk_size(self, in_array, invalid_chunk_size): + """ + Ensure execution fails when using an invalid stream handler: + - One that returns a negative chunk size + - One that returns a chunk size bigger than the buffer capacity + """ + was_called = False + def invalid_stream_handler(): + nonlocal was_called + if was_called: + # avoid infinite loop + raise ValueError("Invalid handler should not have been called a second time") + was_called = True + return invalid_chunk_size -def test_suite(): - return unittest.TestSuite( - [TestPipeline]) + p = pdal.Pipeline(arrays=[in_array], stream_handlers=[invalid_stream_handler]) + with pytest.raises(RuntimeError, + match=f"Stream chunk size not in the range of array length: {invalid_chunk_size}"): + p.execute() -if __name__ == '__main__': - unittest.main()