diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..7435e016 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +* text eol=lf +shapefiles/**/* binary diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..ad565874 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,13 @@ +# These are supported funding model platforms + +github: [geospatialpython] +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry +custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml index abd53835..aa7e47bc 100644 --- a/.github/ISSUE_TEMPLATE/bug.yml +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -7,7 +7,7 @@ body: id: pyshp-version attributes: label: PyShp Version - description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. + description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. placeholder: ... validations: required: true @@ -15,7 +15,7 @@ body: id: python-version attributes: label: Python Version - description: Please input the version of the Python executable. + description: Please input the version of the Python executable. placeholder: ... validations: required: true @@ -23,7 +23,7 @@ body: id: your-code attributes: label: Your code - description: Please copy-paste the relevant parts of your code or script that triggered the error. + description: Please copy-paste the relevant parts of your code or script that triggered the error. placeholder: ... render: shell validations: @@ -41,7 +41,7 @@ body: id: notes attributes: label: Other notes - description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? + description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? placeholder: ... validations: required: false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/newfeature.yml b/.github/ISSUE_TEMPLATE/newfeature.yml index f35326d7..afb043aa 100644 --- a/.github/ISSUE_TEMPLATE/newfeature.yml +++ b/.github/ISSUE_TEMPLATE/newfeature.yml @@ -15,7 +15,7 @@ body: id: contribute attributes: label: Contributions - description: Would you be interested to contribute code that adds this functionality through a Pull Request? We gladly accept PRs - it's much faster and you'll be added a contributor. + description: Would you be interested to contribute code that adds this functionality through a Pull Request? We gladly accept PRs - it's much faster and you'll be added a contributor. options: - label: I am interested in implementing the described feature request and submit as a PR. required: false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml index 76dfb689..d8c0cd09 100644 --- a/.github/ISSUE_TEMPLATE/question.yml +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -6,7 +6,7 @@ body: - type: textarea id: question attributes: - label: What's your question? + label: What's your question? description: Please describe what you would like to know about PyShp, e.g. how to do something. placeholder: ... validations: diff --git a/.github/ISSUE_TEMPLATE/unexpected.yml b/.github/ISSUE_TEMPLATE/unexpected.yml index 07ed85c4..bf0a5778 100644 --- a/.github/ISSUE_TEMPLATE/unexpected.yml +++ b/.github/ISSUE_TEMPLATE/unexpected.yml @@ -7,7 +7,7 @@ body: id: pyshp-version attributes: label: PyShp Version - description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. + description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. placeholder: ... validations: required: true @@ -15,7 +15,7 @@ body: id: python-version attributes: label: Python Version - description: Please input the version of the Python executable. + description: Please input the version of the Python executable. placeholder: ... validations: required: true @@ -23,7 +23,7 @@ body: id: your-code attributes: label: Your code - description: Please copy-paste the relevant parts of your code or script that you tried to run. + description: Please copy-paste the relevant parts of your code or script that you tried to run. placeholder: ... render: shell validations: @@ -48,7 +48,7 @@ body: id: notes attributes: label: Other notes - description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? + description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? placeholder: ... validations: required: false \ No newline at end of file diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml new file mode 100644 index 00000000..c6ca65a4 --- /dev/null +++ b/.github/actions/test/action.yml @@ -0,0 +1,119 @@ +name: + Run Doctests and Pytest + +description: + Run pytest, and run the doctest runner (shapefile.py as a script). + +inputs: + extra_args: + description: Extra command line args for Pytest and python shapefile.py + default: '-m "not network"' + required: false + replace_remote_urls_with_localhost: + description: yes or no. Test loading shapefiles from a url, without overloading an external server from 30 parallel workflows. + default: 'no' + required: false + pyshp_repo_directory: + description: Path to where the PyShp repo was checked out to (to keep separate from Shapefiles & artefacts repo). + required: false + default: '.' + python-version: + description: Set to "2.7" to use caddy instead of python -m SimpleHTTPServer + required: true + + + +runs: + using: "composite" + steps: + # The PyShp repo is required to already be checked out into pyshp_repo_directory, + # e.g. by the calling workflow using: + # steps: + # - uses: actions/checkout@v4 + # with: + # path: ./Pyshp + # and then calling this Action with: + # - name: Run tests + # uses: ./Pyshp/.github/actions/test + # with: + # extra_args: "" + # replace_remote_urls_with_localhost: 'yes' + # pyshp_repo_directory: ./Pyshp + + # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path + + - name: Checkout shapefiles and zip file artefacts repo + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} + uses: actions/checkout@v4 + with: + repository: JamesParrott/PyShp_test_shapefile + path: ./PyShp_test_shapefile + + - name: Serve shapefiles and zip file artefacts on localhost + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version != '2.7'}} + shell: bash + working-directory: ./PyShp_test_shapefile + run: | + python -m http.server 8000 & + echo "HTTP_SERVER_PID=$!" >> $GITHUB_ENV + sleep 4 # give server time to start + + - name: Download and unzip Caddy binary + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version == '2.7'}} + working-directory: . + shell: bash + run: | + curl -L https://github.com/caddyserver/caddy/releases/download/v2.10.0/caddy_2.10.0_linux_amd64.tar.gz --output caddy.tar.gz + tar -xzf caddy.tar.gz + + - name: Serve shapefiles and zip file artefacts on localhost using Caddy + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version == '2.7'}} + shell: bash + working-directory: . + run: | + ./caddy file-server --root ./PyShp_test_shapefile --listen :8000 & + echo "HTTP_SERVER_PID=$!" >> $GITHUB_ENV + sleep 2 # give server time to start + + - name: Doctests + shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }} + env: + REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} + run: python shapefile.py ${{ inputs.extra_args }} + + - name: Install test dependencies. + shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }} + run: | + python -m pip install --upgrade pip + pip install -r requirements.test.txt + + - name: Pytest + shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }} + env: + REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} + run: | + pytest -rA --tb=short ${{ inputs.extra_args }} + + - name: Show versions for logs. + shell: bash + run: | + python --version + python -m pytest --version + + + # - name: Test http server + # # (needs a full Github Actions runner or a Python non-slim Docker image, + # # as the slim Debian images don't have curl or wget). + # if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} + # shell: bash + # run: curl http://localhost:8000/ne_110m_admin_0_tiny_countries.shp + + - name: Stop http server + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} + shell: bash + run: | + echo Killing http server process ID: ${{ env.HTTP_SERVER_PID }} + kill ${{ env.HTTP_SERVER_PID }} \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 26e1159c..00000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,37 +0,0 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions - -name: build - -on: - push: - branches: [ master ] - pull_request: - branches: [ master ] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["2.7", "3.5", "3.6", "3.7", "3.8", "3.9"] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install pytest - if [ -f requirements.test.txt ]; then pip install -r requirements.test.txt; fi - - name: Test with doctest - run: | - python shapefile.py - - name: Test with pytest - run: | - pytest diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9299c686..c66adc89 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,23 +13,36 @@ on: types: [published] jobs: - deploy: + test: + # In general, tests should be run after building a distribution, to test that distribution. + # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) + # then this would only test the packaging process, not so much the code as there are + # no binaries. + uses: ./.github/workflows/run_tests_hooks_and_tools.yml + deploy: + # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests. + needs: test runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.x' + + + - name: Install dependencies run: | python -m pip install --upgrade pip pip install build - name: Build package run: python -m build + - name: Publish package + if: github.repository == 'GeospatialPython/pyshp' uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml new file mode 100644 index 00000000..468b2e2b --- /dev/null +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -0,0 +1,109 @@ +# This workflow will run the pre-commit hooks (including linters), and the tests with a variety of Python versions + +name: Run pre-commit hooks and tests + +on: + push: + pull_request: + branches: [ master, ] + workflow_call: + workflow_dispatch: + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - uses: pre-commit/action@v3.0.1 + + pylint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: install Pylint and plugin + run: | + python -m pip install --upgrade pip + pip install pytest pylint pylint-per-file-ignores + - name: run Pylint for errors and warnings only, on test_shapefile.py + run: | + pylint --disable=R,C test_shapefile.py + + test_on_EOL_Pythons: + strategy: + fail-fast: false + matrix: + python-version: [ + "2.7", + "3.5", + "3.6", + "3.7", + "3.8", + ] + + runs-on: ubuntu-latest + container: + image: python:${{ matrix.python-version }} + + steps: + - uses: actions/checkout@v4 + with: + path: ./Pyshp + + - name: Non-network tests + uses: ./Pyshp/.github/actions/test + with: + pyshp_repo_directory: ./Pyshp + python-version: ${{ matrix.python-version }} + + - name: Network tests + uses: ./Pyshp/.github/actions/test + with: + extra_args: '-m network' + replace_remote_urls_with_localhost: 'yes' + pyshp_repo_directory: ./Pyshp + python-version: ${{ matrix.python-version }} + + test_on_supported_Pythons: + strategy: + fail-fast: false + matrix: + python-version: [ + "3.9", + "3.10", + "3.11", + "3.12", + "3.13", + "3.14.0-beta.4", + ] + os: [ + "macos-latest", + "ubuntu-24.04", + "windows-latest", + ] + + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/checkout@v4 + with: + path: ./Pyshp + + - name: Non-network tests + uses: ./Pyshp/.github/actions/test + with: + pyshp_repo_directory: ./Pyshp + python-version: ${{ matrix.python-version }} + + - name: Network tests + uses: ./Pyshp/.github/actions/test + with: + extra_args: '-m network' + replace_remote_urls_with_localhost: 'yes' + pyshp_repo_directory: ./Pyshp + python-version: ${{ matrix.python-version }} diff --git a/.gitignore b/.gitignore index a82da866..d1734202 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,8 @@ build/ dist/ *.egg-info/ *.py[cod] +.vscode +.dmypy.json +.python-version +.venv +venv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..ffe59bf6 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,15 @@ +repos: +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.4 + hooks: + - id: ruff-format +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + name: isort (python) +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-yaml + - id: trailing-whitespace diff --git a/LICENSE.TXT b/LICENSE.TXT index b7d72761..d2b74462 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -1,9 +1,9 @@ -The MIT License (MIT) - -Copyright © 2013 Joel Lawhead - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +The MIT License (MIT) + +Copyright © 2013 Joel Lawhead + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md index 554753b2..c55e2043 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ The Python Shapefile Library (PyShp) reads and writes ESRI Shapefiles in pure Py - **Author**: [Joel Lawhead](https://github.com/GeospatialPython) - **Maintainers**: [Karim Bahgat](https://github.com/karimbahgat) -- **Version**: 2.3.0 -- **Date**: 30 April, 2022 +- **Version**: 2.3.1 +- **Date**: 28 July, 2022 - **License**: [MIT](https://github.com/GeospatialPython/pyshp/blob/master/LICENSE.TXT) ## Contents @@ -76,14 +76,14 @@ despite the numerous ways to store and exchange GIS data available today. Pyshp is compatible with Python 2.7-3.x. -This document provides examples for using PyShp to read and write shapefiles. However +This document provides examples for using PyShp to read and write shapefiles. However many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), -and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). +and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). Currently the sample census blockgroup shapefile referenced in the examples is available on the GitHub project site at [https://github.com/GeospatialPython/pyshp](https://github.com/GeospatialPython/pyshp). These examples are straight-forward and you can also easily run them against your -own shapefiles with minimal modification. +own shapefiles with minimal modification. Important: If you are new to GIS you should read about map projections. Please visit: [https://github.com/GeospatialPython/pyshp/wiki/Map-Projections](https://github.com/GeospatialPython/pyshp/wiki/Map-Projections) @@ -95,6 +95,28 @@ part of your geospatial project. # Version Changes +## 2.4.0 + +### Breaking Change. Support for Python 2 and Pythons <= 3.8 to be dropped. +- PyShp 2.4.0 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. +These CPython versions have reached [end of life](https://devguide.python.org/versions/#versions). +- Future development will focus on PyShp v3.0.0 onwards (currently intended to supporting Pythons >= 3.9). +- This will not break any projects, as pip and other package managers should not install PyShp 3.0.0 +(after its release) in unsupported Pythons. But we no longer promise such projects will get PyShp's latest +bug fixes and features. +- If this negatively impacts your project, all feedback about this decision is welcome +on our [the discussion page](https://github.com/GeospatialPython/pyshp/discussions/290). + + +### New Features: +- Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. +- Equality comparisons between Records now also require the fields to be the same (and in the same order). + +### Development: +- Code quality tools (Ruff format) run on PyShp +- Network, non-network, or all doctests selectable via command line args +- Network tests made runnable on localhost. + ## 2.3.1 ### Bug fixes: @@ -105,7 +127,7 @@ part of your geospatial project. ### New Features: -- Added support for pathlib and path-like shapefile filepaths (@mwtoews). +- Added support for pathlib and path-like shapefile filepaths (@mwtoews). - Allow reading individual file extensions via filepaths. ### Improvements: @@ -119,7 +141,7 @@ part of your geospatial project. - More robust handling of corrupt shapefiles (fixes #235) - Fix errors when writing to individual file-handles (fixes #237) - Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) -- Fix test issues in environments without network access (@sebastic, @musicinmybrain). +- Fix test issues in environments without network access (@sebastic, @musicinmybrain). ## 2.2.0 @@ -132,7 +154,7 @@ part of your geospatial project. ### Improvements: -- More examples and restructuring of README. +- More examples and restructuring of README. - More informative Shape to geojson warnings (see #219). - Add shapefile.VERBOSE flag to control warnings verbosity (default True). - Shape object information when calling repr(). @@ -189,7 +211,7 @@ part of your geospatial project. ### New Features: -- Added back read/write support for unicode field names. +- Added back read/write support for unicode field names. - Improved Record representation - More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() @@ -201,38 +223,38 @@ part of your geospatial project. ## 2.0.0 -The newest version of PyShp, version 2.0 introduced some major new improvements. +The newest version of PyShp, version 2.0 introduced some major new improvements. A great thanks to all who have contributed code and raised issues, and for everyone's -patience and understanding during the transition period. -Some of the new changes are incompatible with previous versions. +patience and understanding during the transition period. +Some of the new changes are incompatible with previous versions. Users of the previous version 1.x should therefore take note of the following changes -(Note: Some contributor attributions may be missing): +(Note: Some contributor attributions may be missing): ### Major Changes: -- Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. -- PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. +- Full support for unicode text, with custom encoding, and exception handling. + - Means that the Reader returns unicode, and the Writer accepts unicode. +- PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. - Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. + - Specify filepath/destination and text encoding when creating the Writer. + - The file is written incrementally with each call to shape/record. + - Adding shapes is now done using dedicated methods for each shapetype. - Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. + - Shapefiles can be opened using the context manager, and files are properly closed. + - Shapefiles can be iterated, have a length, and supports the geo interface. - New ways of inspecting shapefile metadata by printing. [@megies] - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] -- Add more support and documentation for MultiPatch 3D shapes. -- The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. -- Better documentation of previously unclear aspects, such as field types. + - More convenient shape type name checking. [@megies] +- Add more support and documentation for MultiPatch 3D shapes. +- The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. +- Better documentation of previously unclear aspects, such as field types. ### Important Fixes: - More reliable/robust: - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. + - Improved parsing of field value types, fixed errors and made more flexible. - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] - Fix some geo interface errors, including checking polygon directions. - Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] @@ -275,7 +297,7 @@ OR >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") OR any of the other 5+ formats which are potentially part of a shapefile. The -library does not care about file extensions. You can also specify that you only +library does not care about file extensions. You can also specify that you only want to read some of the file extensions through the use of keyword arguments: @@ -283,7 +305,7 @@ want to read some of the file extensions through the use of keyword arguments: #### Reading Shapefiles from Zip Files -If your shapefile is wrapped inside a zip file, the library is able to handle that too, meaning you don't have to worry about unzipping the contents: +If your shapefile is wrapped inside a zip file, the library is able to handle that too, meaning you don't have to worry about unzipping the contents: >>> sf = shapefile.Reader("shapefiles/blockgroups.zip") @@ -295,11 +317,11 @@ If the zip file contains multiple shapefiles, just specify which shapefile to re #### Reading Shapefiles from URLs -Finally, you can use all of the above methods to read shapefiles directly from the internet, by giving a url instead of a local path, e.g.: +Finally, you can use all of the above methods to read shapefiles directly from the internet, by giving a url instead of a local path, e.g.: >>> # from a zipped shapefile on website - >>> sf = shapefile.Reader("https://biogeo.ucdavis.edu/data/diva/rrd/NIC_rrd.zip") + >>> sf = shapefile.Reader("https://github.com/JamesParrott/PyShp_test_shapefile/raw/main/gis_osm_natural_a_free_1.zip") >>> # from a shapefile collection of files in a github repository >>> sf = shapefile.Reader("https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true") @@ -337,8 +359,8 @@ objects are properly closed when done reading the data: #### Reading Shapefile Meta-Data Shapefiles have a number of attributes for inspecting the file contents. -A shapefile is a container for a specific type of geometry, and this can be checked using the -shapeType attribute. +A shapefile is a container for a specific type of geometry, and this can be checked using the +shapeType attribute. >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") @@ -364,7 +386,7 @@ the existing shape types are not sequential: - POLYGONM = 25 - MULTIPOINTM = 28 - MULTIPATCH = 31 - + Based on this we can see that our blockgroups shapefile contains Polygon type shapes. The shape types are also defined as constants in the shapefile module, so that we can compare types more intuitively: @@ -378,8 +400,8 @@ For convenience, you can also get the name of the shape type as a string: >>> sf.shapeTypeName == 'POLYGON' True - -Other pieces of meta-data that we can check include the number of features + +Other pieces of meta-data that we can check include the number of features and the bounding box area the shapefile covers: @@ -387,10 +409,10 @@ and the bounding box area the shapefile covers: 663 >>> sf.bbox [-122.515048, 37.652916, -122.327622, 37.863433] - + Finally, if you would prefer to work with the entire shapefile in a different format, you can convert all of it to a GeoJSON dictionary, although you may lose -some information in the process, such as z- and m-values: +some information in the process, such as z- and m-values: >>> sf.__geo_interface__['type'] @@ -415,7 +437,7 @@ each shape record. >>> len(shapes) 663 - + To read a single shape by calling its index use the shape() method. The index is the shape's count from 0. So to read the 8th shape record you would use its index which is 7. @@ -457,12 +479,12 @@ shapeType Point do not have a bounding box 'bbox'. >>> shapes[3].shapeType 5 - * `shapeTypeName`: a string representation of the type of shape as defined by shapeType. Read-only. + * `shapeTypeName`: a string representation of the type of shape as defined by shapeType. Read-only. >>> shapes[3].shapeTypeName 'POLYGON' - + * `bbox`: If the shape type contains multiple points this tuple describes the lower left (x,y) coordinate and upper right corner coordinate creating a complete box around the points. If the shapeType is a @@ -496,7 +518,7 @@ shapeType Point do not have a bounding box 'bbox'. >>> ['%.3f' % coord for coord in shape] ['-122.471', '37.787'] -In most cases, however, if you need to do more than just type or bounds checking, you may want +In most cases, however, if you need to do more than just type or bounds checking, you may want to convert the geometry to the more human-readable [GeoJSON format](http://geojson.org), where lines and polygons are grouped for you: @@ -505,7 +527,7 @@ where lines and polygons are grouped for you: >>> geoj = s.__geo_interface__ >>> geoj["type"] 'MultiPolygon' - + The results from the shapes() method similarly supports converting to GeoJSON: @@ -514,12 +536,12 @@ The results from the shapes() method similarly supports converting to GeoJSON: Note: In some cases, if the conversion from shapefile geometry to GeoJSON encountered any problems or potential issues, a warning message will be displayed with information about the affected -geometry. To ignore or suppress these warnings, you can disable this behavior by setting the -module constant VERBOSE to False: +geometry. To ignore or suppress these warnings, you can disable this behavior by setting the +module constant VERBOSE to False: >>> shapefile.VERBOSE = False - + ### Reading Records @@ -534,12 +556,12 @@ You can call the "fields" attribute of the shapefile as a Python list. Each field is a Python list with the following information: * Field name: the name describing the data at this column index. - * Field type: the type of data at this column index. Types can be: + * Field type: the type of data at this column index. Types can be: * "C": Characters, text. * "N": Numbers, with or without decimals. * "F": Floats (same as "N"). - * "L": Logical, for boolean True/False values. - * "D": Dates. + * "L": Logical, for boolean True/False values. + * "D": Dates. * "M": Memo, has no meaning within a GIS and is part of the xbase spec instead. * Field length: the length of the data found at this column index. Older GIS software may truncate this length to 8 or 11 characters for "Character" @@ -571,11 +593,11 @@ attribute: ... ["UNITS3_9", "N", 8, 0], ["UNITS10_49", "N", 8, 0], ... ["UNITS50_UP", "N", 8, 0], ["MOBILEHOME", "N", 7, 0]] -The first field of a dbf file is always a 1-byte field called "DeletionFlag", -which indicates records that have been deleted but not removed. However, -since this flag is very rarely used, PyShp currently will return all records -regardless of their deletion flag, and the flag is also not included in the list of -record values. In other words, the DeletionFlag field has no real purpose, and +The first field of a dbf file is always a 1-byte field called "DeletionFlag", +which indicates records that have been deleted but not removed. However, +since this flag is very rarely used, PyShp currently will return all records +regardless of their deletion flag, and the flag is also not included in the list of +record values. In other words, the DeletionFlag field has no real purpose, and should in most cases be ignored. For instance, to get a list of all fieldnames: @@ -593,10 +615,10 @@ To read a single record call the record() method with the record's index: >>> rec = sf.record(3) - + Each record is a list-like Record object containing the values corresponding to each field in the field list (except the DeletionFlag). A record's values can be accessed by positional indexing or slicing. -For example in the blockgroups shapefile the 2nd and 3rd fields are the blockgroup id +For example in the blockgroups shapefile the 2nd and 3rd fields are the blockgroup id and the 1990 population count of that San Francisco blockgroup: @@ -604,7 +626,7 @@ and the 1990 population count of that San Francisco blockgroup: ['060750601001', 4715] For simpler access, the fields of a record can also accessed via the name of the field, -either as a key or as an attribute name. The blockgroup id (BKG_KEY) of the blockgroups shapefile +either as a key or as an attribute name. The blockgroup id (BKG_KEY) of the blockgroups shapefile can also be retrieved as: @@ -613,7 +635,7 @@ can also be retrieved as: >>> rec.BKG_KEY '060750601001' - + The record values can be easily integrated with other programs by converting it to a field-value dictionary: @@ -621,13 +643,13 @@ The record values can be easily integrated with other programs by converting it >>> sorted(dct.items()) [('AGE_18_29', 1467), ('AGE_30_49', 1681), ('AGE_50_64', 92), ('AGE_5_17', 848), ('AGE_65_UP', 30), ('AGE_UNDER5', 597), ('AMERI_ES', 6), ('AREA', 2.34385), ('ASIAN_PI', 452), ('BKG_KEY', '060750601001'), ('BLACK', 1007), ('DIVORCED', 149), ('FEMALES', 2095), ('FHH_CHILD', 16), ('HISPANIC', 416), ('HOUSEHOLDS', 1195), ('HSEHLD_1_F', 40), ('HSEHLD_1_M', 22), ('HSE_UNITS', 1258), ('MALES', 2620), ('MARHH_CHD', 79), ('MARHH_NO_C', 958), ('MARRIED', 2021), ('MEDIANRENT', 739), ('MEDIAN_VAL', 337500), ('MHH_CHILD', 0), ('MOBILEHOME', 0), ('NEVERMARRY', 703), ('OTHER', 288), ('OWNER_OCC', 66), ('POP1990', 4715), ('POP90_SQMI', 2011.6), ('RENTER_OCC', 3733), ('SEPARATED', 49), ('UNITS10_49', 49), ('UNITS2', 160), ('UNITS3_9', 672), ('UNITS50_UP', 0), ('UNITS_1ATT', 302), ('UNITS_1DET', 43), ('VACANT', 93), ('WHITE', 2962), ('WIDOWED', 37)] -If at a later point you need to check the record's index position in the original +If at a later point you need to check the record's index position in the original shapefile, you can do this through the "oid" attribute: >>> rec.oid 3 - + ### Reading Geometry and Records Simultaneously You may want to examine both the geometry and the attributes for a record at @@ -663,13 +685,13 @@ To get the 4th shape record from the blockgroups shapefile use the third index: >>> shapeRec = sf.shapeRecord(3) >>> shapeRec.record[1:3] ['060750601001', 4715] - + Each individual shape record also supports the _\_geo_interface\_\_ to convert it to a GeoJSON feature: >>> shapeRec.__geo_interface__['type'] 'Feature' - + ## Writing Shapefiles @@ -697,7 +719,7 @@ the file path and name to save to: >>> w = shapefile.Writer('shapefiles/test/testfile') >>> w.field('field1', 'C') - + File extensions are optional when reading or writing shapefiles. If you specify them PyShp ignores them anyway. When you save files you can specify a base file name that is used for all three file types. Or you can specify a name for @@ -706,9 +728,9 @@ one or more file types: >>> w = shapefile.Writer(dbf='shapefiles/test/onlydbf.dbf') >>> w.field('field1', 'C') - + In that case, any file types not assigned will not -save and only file types with file names will be saved. +save and only file types with file names will be saved. #### Writing Shapefiles to File-Like Objects @@ -738,14 +760,14 @@ write to them: >>> r = shapefile.Reader(shp=shp, shx=shx, dbf=dbf) >>> len(r) 1 - - + + #### Writing Shapefiles Using the Context Manager The "Writer" class automatically closes the open files and writes the final headers once it is garbage collected. -In case of a crash and to make the code more readable, it is nevertheless recommended -you do this manually by calling the "close()" method: +In case of a crash and to make the code more readable, it is nevertheless recommended +you do this manually by calling the "close()" method: >>> w.close() @@ -757,15 +779,15 @@ objects are properly closed and final headers written once you exit the with-cla >>> with shapefile.Writer("shapefiles/test/contextwriter") as w: ... w.field('field1', 'C') ... pass - + #### Setting the Shape Type The shape type defines the type of geometry contained in the shapefile. All of the shapes must match the shape type setting. -There are three ways to set the shape type: - * Set it when creating the class instance. - * Set it by assigning a value to an existing class instance. +There are three ways to set the shape type: + * Set it when creating the class instance. + * Set it by assigning a value to an existing class instance. * Set it automatically to the type of the first non-null shape by saving the shapefile. To manually set the shape type for a Writer object when creating the Writer: @@ -784,14 +806,14 @@ OR you can set it after the Writer is created: >>> w.shapeType 1 - + ### Adding Records -Before you can add records you must first create the fields that define what types of -values will go into each attribute. +Before you can add records you must first create the fields that define what types of +values will go into each attribute. -There are several different field types, all of which support storing None values as NULL. +There are several different field types, all of which support storing None values as NULL. Text fields are created using the 'C' type, and the third 'size' argument can be customized to the expected length of text values to save space: @@ -804,12 +826,12 @@ length of text values to save space: >>> w.null() >>> w.record('Hello', 'World', 'World'*50) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == ['Hello', 'World', 'World'*50] -Date fields are created using the 'D' type, and can be created using either -date objects, lists, or a YYYYMMDD formatted string. +Date fields are created using the 'D' type, and can be created using either +date objects, lists, or a YYYYMMDD formatted string. Field length or decimal have no impact on this type: @@ -825,18 +847,18 @@ Field length or decimal have no impact on this type: >>> w.record('19980130') >>> w.record(None) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == [date(1898,1,30)] >>> assert r.record(1) == [date(1998,1,30)] >>> assert r.record(2) == [date(1998,1,30)] >>> assert r.record(3) == [None] -Numeric fields are created using the 'N' type (or the 'F' type, which is exactly the same). -By default the fourth decimal argument is set to zero, essentially creating an integer field. -To store floats you must set the decimal argument to the precision of your choice. -To store very large numbers you must increase the field length size to the total number of digits -(including comma and minus). +Numeric fields are created using the 'N' type (or the 'F' type, which is exactly the same). +By default the fourth decimal argument is set to zero, essentially creating an integer field. +To store floats you must set the decimal argument to the precision of your choice. +To store very large numbers you must increase the field length size to the total number of digits +(including comma and minus). >>> w = shapefile.Writer('shapefiles/test/dtype') @@ -852,15 +874,15 @@ To store very large numbers you must increase the field length size to the total >>> w.record(INT=nr, LOWPREC=nr, MEDPREC=nr, HIGHPREC=-3.2302e-25, FTYPE=nr, LARGENR=int(nr)*10**100) >>> w.record(None, None, None, None, None, None) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == [1, 1.32, 1.3217328, -3.2302e-25, 1.3217328, 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000] >>> assert r.record(1) == [None, None, None, None, None, None] - -Finally, we can create boolean fields by setting the type to 'L'. -This field can take True or False values, or 1 (True) or 0 (False). -None is interpreted as missing. + +Finally, we can create boolean fields by setting the type to 'L'. +This field can take True or False values, or 1 (True) or 0 (False). +None is interpreted as missing. >>> w = shapefile.Writer('shapefiles/test/dtype') @@ -876,9 +898,9 @@ None is interpreted as missing. >>> w.record(False) >>> w.record(0) >>> w.record(None) - >>> w.record("Nonesense") + >>> w.record("Nonsense") >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> r.record(0) Record #0: [True] @@ -892,7 +914,7 @@ None is interpreted as missing. Record #4: [None] >>> r.record(5) Record #5: [None] - + You can also add attributes using keyword arguments where the keys are field names. @@ -909,12 +931,12 @@ You can also add attributes using keyword arguments where the keys are field nam Geometry is added using one of several convenience methods. The "null" method is used for null shapes, "point" is used for point shapes, "multipoint" is used for multipoint shapes, "line" for lines, -"poly" for polygons. +"poly" for polygons. **Adding a Null shape** -A shapefile may contain some records for which geometry is not available, and may be set using the "null" method. -Because Null shape types (shape type 0) have no geometry the "null" method is called without any arguments. +A shapefile may contain some records for which geometry is not available, and may be set using the "null" method. +Because Null shape types (shape type 0) have no geometry the "null" method is called without any arguments. >>> w = shapefile.Writer('shapefiles/test/null') @@ -928,59 +950,59 @@ Because Null shape types (shape type 0) have no geometry the "null" method is ca **Adding a Point shape** Point shapes are added using the "point" method. A point is specified by an x and -y value. +y value. >>> w = shapefile.Writer('shapefiles/test/point') >>> w.field('name', 'C') - - >>> w.point(122, 37) + + >>> w.point(122, 37) >>> w.record('point1') - + >>> w.close() **Adding a MultiPoint shape** -If your point data allows for the possibility of multiple points per feature, use "multipoint" instead. -These are specified as a list of xy point coordinates. +If your point data allows for the possibility of multiple points per feature, use "multipoint" instead. +These are specified as a list of xy point coordinates. >>> w = shapefile.Writer('shapefiles/test/multipoint') >>> w.field('name', 'C') - - >>> w.multipoint([[122,37], [124,32]]) + + >>> w.multipoint([[122,37], [124,32]]) >>> w.record('multipoint1') - + >>> w.close() - + **Adding a LineString shape** -For LineString shapefiles, each shape is given as a list of one or more linear features. -Each of the linear features must have at least two points. - - +For LineString shapefiles, each shape is given as a list of one or more linear features. +Each of the linear features must have at least two points. + + >>> w = shapefile.Writer('shapefiles/test/line') >>> w.field('name', 'C') - + >>> w.line([ ... [[1,5],[5,5],[5,1],[3,3],[1,1]], # line 1 ... [[3,2],[2,6]] # line 2 ... ]) - + >>> w.record('linestring1') - + >>> w.close() - + **Adding a Polygon shape** Similarly to LineString, Polygon shapes consist of multiple polygons, and must be given as a list of polygons. -The main difference is that polygons must have at least 4 points and the last point must be the same as the first. +The main difference is that polygons must have at least 4 points and the last point must be the same as the first. It's also okay if you forget to repeat the first point at the end; PyShp automatically checks and closes the polygons if you don't. It's important to note that for Polygon shapefiles, your polygon coordinates must be ordered in a clockwise direction. If any of the polygons have holes, then the hole polygon coordinates must be ordered in a counterclockwise direction. -The direction of your polygons determines how shapefile readers will distinguish between polygon outlines and holes. +The direction of your polygons determines how shapefile readers will distinguish between polygon outlines and holes. >>> w = shapefile.Writer('shapefiles/test/polygon') @@ -992,13 +1014,13 @@ The direction of your polygons determines how shapefile readers will distinguish ... [[15,2], [17,6], [22,7]] # poly 2 ... ]) >>> w.record('polygon1') - + >>> w.close() - + **Adding from an existing Shape object** Finally, geometry can be added by passing an existing "Shape" object to the "shape" method. -You can also pass it any GeoJSON dictionary or _\_geo_interface\_\_ compatible object. +You can also pass it any GeoJSON dictionary or _\_geo_interface\_\_ compatible object. This can be particularly useful for copying from one file to another: @@ -1011,14 +1033,14 @@ This can be particularly useful for copying from one file to another: >>> for shaperec in r.iterShapeRecords(): ... w.record(*shaperec.record) ... w.shape(shaperec.shape) - + >>> # or GeoJSON dicts >>> for shaperec in r.iterShapeRecords(): ... w.record(*shaperec.record) ... w.shape(shaperec.shape.__geo_interface__) - - >>> w.close() - + + >>> w.close() + ### Geometry and Record Balancing @@ -1027,17 +1049,17 @@ number of records equals the number of shapes to create a valid shapefile. You must take care to add records and shapes in the same order so that the record data lines up with the geometry data. For example: - + >>> w = shapefile.Writer('shapefiles/test/balancing', shapeType=shapefile.POINT) >>> w.field("field1", "C") >>> w.field("field2", "C") - + >>> w.record("row", "one") >>> w.point(1, 1) - + >>> w.record("row", "two") >>> w.point(2, 2) - + To help prevent accidental misalignment PyShp has an "auto balance" feature to make sure when you add either a shape or a record the two sides of the equation line up. This way if you forget to update an entry the @@ -1050,7 +1072,7 @@ the attribute autoBalance to 1 or True: >>> w.record("row", "three") >>> w.record("row", "four") >>> w.point(4, 4) - + >>> w.recNum == w.shpNum True @@ -1059,7 +1081,7 @@ to ensure the other side is up to date. When balancing is used null shapes are created on the geometry side or records with a value of "NULL" for each field is created on the attribute side. This gives you flexibility in how you build the shapefile. -You can create all of the shapes and then create all of the records or vice versa. +You can create all of the shapes and then create all of the records or vice versa. >>> w.autoBalance = 0 @@ -1069,25 +1091,40 @@ You can create all of the shapes and then create all of the records or vice vers >>> w.point(5, 5) >>> w.point(6, 6) >>> w.balance() - + >>> w.recNum == w.shpNum True If you do not use the autoBalance() or balance() method and forget to manually balance the geometry and attributes the shapefile will be viewed as corrupt by most shapefile software. - +### Writing .prj files +A .prj file, or projection file, is a simple text file that stores a shapefile's map projection and coordinate reference system to help mapping software properly locate the geometry on a map. If you don't have one, you may get confusing errors when you try and use the shapefile you created. The GIS software may complain that it doesn't know the shapefile's projection and refuse to accept it, it may assume the shapefile is the same projection as the rest of your GIS project and put it in the wrong place, or it might assume the coordinates are an offset in meters from latitude and longitude 0,0 which will put your data in the middle of the ocean near Africa. The text in the .prj file is a [Well-Known-Text (WKT) projection string](https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems). Projection strings can be quite long so they are often referenced using numeric codes call EPSG codes. The .prj file must have the same base name as your shapefile. So for example if you have a shapefile named "myPoints.shp", the .prj file must be named "myPoints.prj". + +If you're using the same projection over and over, the following is a simple way to create the .prj file assuming your base filename is stored in a variable called "filename": + +``` + with open("{}.prj".format(filename), "w") as prj: + wkt = 'GEOGCS["WGS 84",' + wkt += 'DATUM["WGS_1984",' + wkt += 'SPHEROID["WGS 84",6378137,298.257223563]]' + wkt += ',PRIMEM["Greenwich",0],' + wkt += 'UNIT["degree",0.0174532925199433]]' + prj.write(wkt) +``` + +If you need to dynamically fetch WKT projection strings, you can use the pure Python [PyCRS](https://github.com/karimbahgat/PyCRS) module which has a number of useful features. # Advanced Use ## Common Errors and Fixes -Below we list some commonly encountered errors and ways to fix them. +Below we list some commonly encountered errors and ways to fix them. ### Warnings and Logging -By default, PyShp chooses to be transparent and provide the user with logging information and warnings about non-critical issues when reading or writing shapefiles. This behavior is controlled by the module constant `VERBOSE` (which defaults to True). If you would rather suppress this information, you can simply set this to False: +By default, PyShp chooses to be transparent and provide the user with logging information and warnings about non-critical issues when reading or writing shapefiles. This behavior is controlled by the module constant `VERBOSE` (which defaults to True). If you would rather suppress this information, you can simply set this to False: >>> shapefile.VERBOSE = False @@ -1100,21 +1137,21 @@ All logging happens under the namespace `shapefile`. So another way to suppress ### Shapefile Encoding Errors -PyShp supports reading and writing shapefiles in any language or character encoding, and provides several options for decoding and encoding text. -Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't have to specify the encoding. -If you encounter an encoding error when reading a shapefile, this means the shapefile was likely written in a non-utf8 encoding. +PyShp supports reading and writing shapefiles in any language or character encoding, and provides several options for decoding and encoding text. +Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't have to specify the encoding. +If you encounter an encoding error when reading a shapefile, this means the shapefile was likely written in a non-utf8 encoding. For instance, when working with English language shapefiles, a common reason for encoding errors is that the shapefile was written in Latin-1 encoding. -For reading shapefiles in any non-utf8 encoding, such as Latin-1, just -supply the encoding option when creating the Reader class. +For reading shapefiles in any non-utf8 encoding, such as Latin-1, just +supply the encoding option when creating the Reader class. >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="latin1") >>> r.record(0) == [2, u'Ă‘andĂş'] True - -Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such -as UTF-8. Assuming the new encoding supports the characters you are trying to write, reading it back in -should give you the same unicode string you started with. + +Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such +as UTF-8. Assuming the new encoding supports the characters you are trying to write, reading it back in +should give you the same unicode string you started with. >>> w = shapefile.Writer("shapefiles/test/latin_as_utf8.shp", encoding="utf8") @@ -1122,15 +1159,15 @@ should give you the same unicode string you started with. >>> w.record(*r.record(0)) >>> w.null() >>> w.close() - + >>> r = shapefile.Reader("shapefiles/test/latin_as_utf8.shp", encoding="utf8") >>> r.record(0) == [2, u'Ă‘andĂş'] True - + If you supply the wrong encoding and the string is unable to be decoded, PyShp will by default raise an exception. If however, on rare occasion, you are unable to find the correct encoding and want to ignore or replace encoding errors, you can specify the "encodingErrors" to be used by the decode method. This -applies to both reading and writing. +applies to both reading and writing. >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="ascii", encodingErrors="replace") @@ -1141,8 +1178,8 @@ applies to both reading and writing. ## Reading Large Shapefiles -Despite being a lightweight library, PyShp is designed to be able to read shapefiles of any size, allowing you to work with hundreds of thousands or even millions -of records and complex geometries. +Despite being a lightweight library, PyShp is designed to be able to read shapefiles of any size, allowing you to work with hundreds of thousands or even millions +of records and complex geometries. ### Iterating through a shapefile @@ -1152,22 +1189,22 @@ As an example, let's load this Natural Earth shapefile of more than 4000 global >>> sf = shapefile.Reader("https://github.com/nvkelso/natural-earth-vector/blob/master/10m_cultural/ne_10m_admin_1_states_provinces?raw=true") When first creating the Reader class, the library only reads the header information -and leaves the rest of the file contents alone. Once you call the records() and shapes() -methods however, it will attempt to read the entire file into memory at once. +and leaves the rest of the file contents alone. Once you call the records() and shapes() +methods however, it will attempt to read the entire file into memory at once. For very large files this can result in MemoryError. So when working with large files it is recommended to use instead the iterShapes(), iterRecords(), or iterShapeRecords() -methods instead. These iterate through the file contents one at a time, enabling you to loop -through them while keeping memory usage at a minimum. +methods instead. These iterate through the file contents one at a time, enabling you to loop +through them while keeping memory usage at a minimum. >>> for shape in sf.iterShapes(): ... # do something here ... pass - + >>> for rec in sf.iterRecords(): ... # do something here ... pass - + >>> for shapeRec in sf.iterShapeRecords(): ... # do something here ... pass @@ -1187,7 +1224,7 @@ By default when reading the attribute records of a shapefile, pyshp unpacks and ... pass >>> rec Record #4595: ['Birgu', 'Malta'] - + ### Attribute filtering In many cases, we aren't interested in all entries of a shapefile, but rather only want to retrieve a small subset of records by filtering on some attribute. To avoid wasting time reading records and shapes that we don't need, we can start by iterating only the records and fields of interest, check if the record matches some condition as a way to filter the data, and finally load the full record and shape geometry for those that meet the condition: @@ -1207,11 +1244,11 @@ In many cases, we aren't interested in all entries of a shapefile, but rather on 'Maekel' 'Anseba' -Selectively reading only the necessary data in this way is particularly useful for efficiently processing a limited subset of data from very large files or when looping through a large number of files, especially if they contain large attribute tables or complex shape geometries. +Selectively reading only the necessary data in this way is particularly useful for efficiently processing a limited subset of data from very large files or when looping through a large number of files, especially if they contain large attribute tables or complex shape geometries. ### Spatial filtering -Another common use-case is that we only want to read those records that are located in some region of interest. Because the shapefile stores the bounding box of each shape separately from the geometry data, it's possible to quickly retrieve all shapes that might overlap a given bounding box region without having to load the full shape geometry data for every shape. This can be done by specifying the `bbox` argument to any of the record or shape methods: +Another common use-case is that we only want to read those records that are located in some region of interest. Because the shapefile stores the bounding box of each shape separately from the geometry data, it's possible to quickly retrieve all shapes that might overlap a given bounding box region without having to load the full shape geometry data for every shape. This can be done by specifying the `bbox` argument to the shapes, iterShapes, or iterShapeRecords methods: >>> bbox = [36.423, 12.360, 43.123, 18.004] # ca bbox of Eritrea @@ -1238,23 +1275,23 @@ Another common use-case is that we only want to read those records that are loca Record #2037: ['Al Hudaydah', 'Yemen'] Record #3741: ['Anseba', 'Eritrea'] -This functionality means that shapefiles can be used as a bare-bones spatially indexed database, with very fast bounding box queries for even the largest of shapefiles. Note that, as with all spatial indexing, this method does not guarantee that the *geometries* of the resulting matches overlap the queried region, only that their *bounding boxes* overlap. +This functionality means that shapefiles can be used as a bare-bones spatially indexed database, with very fast bounding box queries for even the largest of shapefiles. Note that, as with all spatial indexing, this method does not guarantee that the *geometries* of the resulting matches overlap the queried region, only that their *bounding boxes* overlap. ## Writing large shapefiles -Similar to the Reader class, the shapefile Writer class uses a streaming approach to keep memory -usage at a minimum and allow writing shapefiles of arbitrarily large sizes. The library takes care of this under-the-hood by immediately -writing each geometry and record to disk the moment they -are added using shape() or record(). Once the writer is closed, exited, or garbage -collected, the final header information is calculated and written to the beginning of -the file. +Similar to the Reader class, the shapefile Writer class uses a streaming approach to keep memory +usage at a minimum and allow writing shapefiles of arbitrarily large sizes. The library takes care of this under-the-hood by immediately +writing each geometry and record to disk the moment they +are added using shape() or record(). Once the writer is closed, exited, or garbage +collected, the final header information is calculated and written to the beginning of +the file. ### Merging multiple shapefiles -This means that it's possible to merge hundreds or thousands of shapefiles, as -long as you iterate through the source files to avoid loading everything into +This means that it's possible to merge hundreds or thousands of shapefiles, as +long as you iterate through the source files to avoid loading everything into memory. The following example copies the contents of a shapefile to a new file 10 times: >>> # create writer @@ -1280,12 +1317,12 @@ memory. The following example copies the contents of a shapefile to a new file 1 >>> # close the writer >>> w.close() -In this trivial example, we knew that all files had the exact same field names, ordering, and types. In other scenarios, you will have to additionally make sure that all shapefiles have the exact same fields in the same order, and that they all contain the same geometry type. +In this trivial example, we knew that all files had the exact same field names, ordering, and types. In other scenarios, you will have to additionally make sure that all shapefiles have the exact same fields in the same order, and that they all contain the same geometry type. ### Editing shapefiles -If you need to edit a shapefile you would have to read the -file one record at a time, modify or filter the contents, and write it back out. For instance, to create a copy of a shapefile that only keeps a subset of relevant fields: +If you need to edit a shapefile you would have to read the +file one record at a time, modify or filter the contents, and write it back out. For instance, to create a copy of a shapefile that only keeps a subset of relevant fields: >>> # create writer >>> w = shapefile.Writer('shapefiles/test/edit') @@ -1310,7 +1347,7 @@ file one record at a time, modify or filter the contents, and write it back out. ## 3D and Other Geometry Types Most shapefiles store conventional 2D points, lines, or polygons. But the shapefile format is also capable -of storing various other types of geometries as well, including complex 3D surfaces and objects. +of storing various other types of geometries as well, including complex 3D surfaces and objects. ### Shapefiles with measurement (M) values @@ -1323,110 +1360,148 @@ or by simply omitting the third M-coordinate. >>> w = shapefile.Writer('shapefiles/test/linem') >>> w.field('name', 'C') - + >>> w.linem([ ... [[1,5,0],[5,5],[5,1,3],[3,3,None],[1,1,0]], # line with one omitted and one missing M-value ... [[3,2],[2,6]] # line without any M-values ... ]) - + >>> w.record('linem1') - + >>> w.close() - + Shapefiles containing M-values can be examined in several ways: >>> r = shapefile.Reader('shapefiles/test/linem') - + >>> r.mbox # the lower and upper bound of M-values in the shapefile [0.0, 3.0] - + >>> r.shape(0).m # flat list of M-values [0.0, None, 3.0, None, 0.0, None, None] - + ### Shapefiles with elevation (Z) values -Elevation shape types are shapes that include an elevation value at each vertex, for instance elevation from a GPS device. -Shapes with elevation (Z) values are added with the following methods: "pointz", "multipointz", "linez", and "polyz". +Elevation shape types are shapes that include an elevation value at each vertex, for instance elevation from a GPS device. +Shapes with elevation (Z) values are added with the following methods: "pointz", "multipointz", "linez", and "polyz". The Z-values are specified by adding a third Z value to each XY coordinate. Z-values do not support the concept of missing data, but if you omit the third Z-coordinate it will default to 0. Note that Z-type shapes also support measurement (M) values added -as a fourth M-coordinate. This too is optional. - - +as a fourth M-coordinate. This too is optional. + + >>> w = shapefile.Writer('shapefiles/test/linez') >>> w.field('name', 'C') - + >>> w.linez([ ... [[1,5,18],[5,5,20],[5,1,22],[3,3],[1,1]], # line with some omitted Z-values ... [[3,2],[2,6]], # line without any Z-values ... [[3,2,15,0],[2,6,13,3],[1,9,14,2]] # line with both Z- and M-values ... ]) - + >>> w.record('linez1') - + >>> w.close() - + To examine a Z-type shapefile you can do: >>> r = shapefile.Reader('shapefiles/test/linez') - + >>> r.zbox # the lower and upper bound of Z-values in the shapefile [0.0, 22.0] - + >>> r.shape(0).z # flat list of Z-values [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] ### 3D MultiPatch Shapefiles -Multipatch shapes are useful for storing composite 3-Dimensional objects. +Multipatch shapes are useful for storing composite 3-Dimensional objects. A MultiPatch shape represents a 3D object made up of one or more surface parts. Each surface in "parts" is defined by a list of XYZM values (Z and M values optional), and its corresponding type is -given in the "partTypes" argument. The part type decides how the coordinate sequence is to be interpreted, and can be one +given in the "partTypes" argument. The part type decides how the coordinate sequence is to be interpreted, and can be one of the following module constants: TRIANGLE_STRIP, TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. -For instance, a TRIANGLE_STRIP may be used to represent the walls of a building, combined with a TRIANGLE_FAN to represent -its roof: +For instance, a TRIANGLE_STRIP may be used to represent the walls of a building, combined with a TRIANGLE_FAN to represent +its roof: >>> from shapefile import TRIANGLE_STRIP, TRIANGLE_FAN - + >>> w = shapefile.Writer('shapefiles/test/multipatch') >>> w.field('name', 'C') - + >>> w.multipatch([ ... [[0,0,0],[0,0,3],[5,0,0],[5,0,3],[5,5,0],[5,5,3],[0,5,0],[0,5,3],[0,0,0],[0,0,3]], # TRIANGLE_STRIP for house walls ... [[2.5,2.5,5],[0,0,3],[5,0,3],[5,5,3],[0,5,3],[0,0,3]], # TRIANGLE_FAN for pointed house roof ... ], ... partTypes=[TRIANGLE_STRIP, TRIANGLE_FAN]) # one type for each part - + >>> w.record('house1') - + >>> w.close() - + For an introduction to the various multipatch part types and examples of how to create 3D MultiPatch objects see [this -ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). +ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). + - # Testing -The testing framework is pytest, and the tests are located in test_shapefile.py. -This includes an extensive set of unit tests of the various pyshp features, -and tests against various input data. Some of the tests that require -internet connectivity will be skipped in offline testing environments. -In the same folder as README.md and shapefile.py, from the command line run +The testing framework is pytest, and the tests are located in test_shapefile.py. +This includes an extensive set of unit tests of the various pyshp features, +and tests against various input data. +In the same folder as README.md and shapefile.py, from the command line run + +```shell +python -m pytest ``` -$ python -m pytest -``` -Additionally, all the code and examples located in this file, README.md, +Additionally, all the code and examples located in this file, README.md, is tested and verified with the builtin doctest framework. A special routine for invoking the doctest is run when calling directly on shapefile.py. -In the same folder as README.md and shapefile.py, from the command line run +In the same folder as README.md and shapefile.py, from the command line run + +```shell +python shapefile.py +``` + +Linux/Mac and similar platforms may need to run `$ dos2unix README.md` in order +to correct line endings in README.md, if Git has not automatically changed them. + +## Network tests + +Some of the tests and doctests, are intended to test reading shapefiles from +remote servers, which requires internet connectivity. The pytest tests are marked "network". +For rapid iteration, in CI, or when developing in offline testing environments, these +tests can be dealt with in two ways: + i) by skipping the network tests via : +```shell +pytest -m "not network" ``` -$ python shapefile.py -``` +or the doctests via: +```shell +python shapefile.py -m "not network" +``` +or ii) by cloning a repo of the files they download, serving these on localhost in a separate process, +and running the network tests with the environment variable REPLACE_REMOTE_URLS_WITH_LOCALHOST to `yes`: +Setup a local file server (*): +``` +git clone http://github.com/JamesParrott/PyShp_test_shapefile +cd PyShp_test_shapefile +python -m http.server 8000 +``` +and then: +```bash +REPLACE_REMOTE_URLS_WITH_LOCALHOST=yes && pytest +``` +or the doctests via: +```bash +REPLACE_REMOTE_URLS_WITH_LOCALHOST=yes && python shapefile.py +``` +The network tests alone can also be run (without also running all the tests that don't +make network requests) using: `pytest -m network` (or the doctests using: `python shapefile.py -m network`). + +(*) The steps to host the files using Caddy for PYthon 2 are in ./actions/test/action.yml. For reasons as +yet unknown, shapefile.py's Reader class in Python 2 Pytest, can't connect to a Python 2 SimpleHTTPServer. -Linux/Mac and similar platforms will need to run `$ dos2unix README.md` in order -to correct line endings in README.md. # Contributors @@ -1444,11 +1519,14 @@ fiveham geospatialpython Hannes Ignacio Martinez Vazquez +James Parrott Jason Moujaes Jonty Wareing Karim Bahgat karanrn +Kurt Schwehr Kyle Kelley +Lionel Guez Louis Tiao Marcin Cuprjak mcuprjak diff --git a/changelog.txt b/changelog.txt index 1735183f..533d704e 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,271 +1,271 @@ - -VERSION 2.3.1 - -2022-07-28 - Bug fixes: - * Fix recently introduced issue where Reader/Writer closes file-like objects provided by user (#244) - -VERSION 2.3.0 - -2022-04-30 - New Features: - * Added support for pathlib and path-like shapefile filepaths (@mwtoews). - * Allow reading individual file extensions via filepaths. - - Improvements: - * Simplified setup and deployment (@mwtoews) - * Faster shape access when missing shx file - * Switch to named logger (see #240) - - Bug fixes: - * More robust handling of corrupt shapefiles (fixes #235) - * Fix errors when writing to individual file-handles (fixes #237) - * Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) - * Fix test issues in environments without network access (@sebastic, @musicinmybrain). - -VERSION 2.2.0 - -2022-02-02 - New Features: - * Read shapefiles directly from zipfiles. - * Read shapefiles directly from urls. - * Allow fast extraction of only a subset of dbf fields through a `fields` arg. - * Allow fast filtering which shapes to read from the file through a `bbox` arg. - - Improvements: - * More examples and restructuring of README. - * More informative Shape to geojson warnings (see #219). - * Add shapefile.VERBOSE flag to control warnings verbosity (default True). - * Shape object information when calling repr(). - * Faster ring orientation checks, enforce geojson output ring orientation. - - Bug fixes: - * Remove null-padding at end of some record character fields. - * Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. - * Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) - * Fix bug where records and shapes would be assigned incorrect record number (@karanrn) - * Fix typos in docs (@timgates) - -VERSION 2.1.3 - -2021-01-14 - Bug fixes: - * Fix recent bug in geojson hole-in-polygon checking (see #205) - * Misc fixes to allow geo interface dump to json (eg dates as strings) - * Handle additional dbf date null values, and return faulty dates as unicode (see #187) - * Add writer target typecheck - * Fix bugs to allow reading shp/shx/dbf separately - * Allow delayed shapefile loading by passing no args - * Fix error with writing empty z/m shapefile (@mcuprjak) - * Fix signed_area() so ignores z/m coords - * Enforce writing the 11th field name character as null-terminator (only first 10 are used) - * Minor README fixes - * Added more tests - -VERSION 2.1.2 - -2020-09-10 - Bug fixes: - * Fix issue where `warnings.simplefilter('always')` changes global warning behavior [see #203] - -VERSION 2.1.1 - -2020-09-09 - Improvements: - * Handle shapes with no coords and represent as geojson with no coords (GeoJSON null-equivalent) - * Expand testing to Python 3.6, 3.7, 3.8 and PyPy; drop 3.3 and 3.4 [@mwtoews] - * Added pytest testing [@jmoujaes] - - Bug fixes: - * Fix incorrect geo interface handling of multipolygons with complex exterior-hole relations [see #202] - * Enforce shapefile requirement of at least one field, to avoid writing invalid shapefiles [@Jonty] - * Fix Reader geo interface including DeletionFlag field in feature properties [@nnseva] - * Fix polygons not being auto closed, which was accidentally dropped - * Fix error for null geometries in feature geojson - * Misc docstring cleanup [@fiveham] - -VERSION 2.1.0 - -2019-02-15 - New Features: - * Added back read/write support for unicode field names. - * Improved Record representation - * More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() - - Bug fixes: - * Fixed error when reading optional m-values - * Fixed Record attribute autocomplete in Python 3 - * Misc readme cleanup - -VERSION 2.0.1 - -2018-11-05 - * Fix pip install setup.py README decoding error. - -VERSION 2.0.0 - -2018-09-01 - (Note: Some contributor attributions may be missing.) - New Features: - * Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. - * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. - * Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. - * Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. - - New ways of inspecing shapefile metadata by printing. [@megies] - - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] - * Add more support and documentation for MultiPatch 3D shapes. - * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. - * Better documentation of previously unclear aspects, such as field types. - - Bug Fixes: - * More reliable/robust: - - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. - - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] - * Fix some geo interface errors, including checking polygon directions. - * Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] - * Enforce maximum field limit. [@mwtoews] - -VERSION 1.2.12 - * ? - -VERSION 1.2.11 - -2017-04-29 Karim Bahgat - * Fixed bugs when reading and writing empty shapefiles. - * Fixed bug when writing null geometry. - * Fixed misc data type errors. - * Fixed error when reading files with wrong record length. - * Use max field precision when saving decimal numbers. - * Improved shapetype detection. - * Expanded docs on data types. - * General doc additions and travis icon. - -VERSION 1.2.10 - -2016-09-24 Karim Bahgat - * Bump version to fix pip install issue. - -VERSION 1.2.9 - -2016-09-22 Karim Bahgat - * Revert back to fix #66. - -VERSION 1.2.8 - -2016-08-17 Joel Lawhead - * Configured Travis-CI - -VERSION 1.2.5 - -2016-08-16 Joel Lawhead - * Reader speed up through batch unpacking bytes - * Merge README text into markdown file. Remove text version. - * Fixed parsing of number of points for some shapes (MULTIPOINTM, MULTIPOINTZ) - -VERSON 1.2.3 - -2015-06-21 Joel Lawhead - *shapefile.py (u) Bugfix for Python3 with Reader.iterShapeRecords() - -VERSION 1.2.2 - -### upcoming (2015/01/09 05:27 +00:00) -- [#11](https://github.com/geospatialpython/pyshp/pull/11) Merge pull request #11 from 7mp/master (@7mp) -- [#1](https://github.com/geospatialpython/pyshp/pull/1) Merge pull request #1 from rgbkrk/patch-1 (@rgbkrk) -- [#13](https://github.com/geospatialpython/pyshp/pull/13) Merge pull request #13 from jzmiller1/patch-1 (@jzmiller1) -- [#16](https://github.com/geospatialpython/pyshp/pull/16) Merge pull request #16 from riggsd/null-friendly (@riggsd) -- [#17](https://github.com/geospatialpython/pyshp/pull/17) Merge pull request #17 from riggsd/no-asserts (@riggsd) -- [#19](https://github.com/geospatialpython/pyshp/pull/19) Merge pull request #19 from razzius/master (@razzius) -- [#20](https://github.com/geospatialpython/pyshp/pull/20) Merge pull request #20 from Brideau/patch-1 (@Brideau) -- [12d69d4](https://github.com/GeospatialPython/pyshp/commit/12d69d47d8c90b445ea22bf5d9530b0c1c710de5) Updated to version 1.2.1 to match PyPI (@GeospatialPython) -- [05b69dc](https://github.com/GeospatialPython/pyshp/commit/05b69dc6b3d58c0dc9a822f6c4b8d45cf8dc9d94) Updated to version 1.2.1 to match PyPI (@GeospatialPython) -- [d2e9f1a](https://github.com/GeospatialPython/pyshp/commit/d2e9f1a41d02cf932484111f45c31781d1f7385a) Typo: recordsIter should be iterRecords (@Brideau) -- [a965aff](https://github.com/GeospatialPython/pyshp/commit/a965aff230aa3f3b85016f7b627609c7e53a2cf9) Format README code sample (@razzius) -- [66e1802](https://github.com/GeospatialPython/pyshp/commit/66e1802013fd3535baa505e15625afaa895ef819) Raise ShapefileException for runtime errors rather than using `assert()` -- [d72723c](https://github.com/GeospatialPython/pyshp/commit/d72723c9e38db8e859b79d95a65c00af1c2ba8ba) Return None when parsing (illegal) NULL attribute values in numeric and date fields, like those produced by QGIS -- [783e68c](https://github.com/GeospatialPython/pyshp/commit/783e68c75b8f20c7656ea470dbc5e9496a8ee0ac) Update link to "XBase File Format Description" (@jzmiller1) -- [79cc409](https://github.com/GeospatialPython/pyshp/commit/79cc409362a24caf4a21923419490ee95d557dc3) Added `Reader.iterShapeRecords` to help work with larger files -- [18c5521](https://github.com/GeospatialPython/pyshp/commit/18c5521b89cd1d7968dff8eb03c1ec37ab4307c5) URL Change (@rgbkrk) -- [202143c](https://github.com/GeospatialPython/pyshp/commit/202143c823407ffea07b5400e77b9ded7169f696) README.md TOC Take 2 -- [2cca75c](https://github.com/GeospatialPython/pyshp/commit/2cca75cd09b27bb19a77ffeb68afc535e3c33802) README.md TOC -- [8b5e994](https://github.com/GeospatialPython/pyshp/commit/8b5e994905fd4a70c0f7ce6d814346e6666b280c) README.md -- [f31a3d7](https://github.com/GeospatialPython/pyshp/commit/f31a3d773dd22e65d3e38ad8b034f186a05b4c4d) Update README.txt (@GeospatialPython) - -VERSION 1.2.1 - -2014-05-11 Joel Lawhead - *shapefile.py (u) fixed bug which failed to properly read some dbf fields in Python 3 - -VERSION 1.2.0 - -2013-09-05 Joel Lawhead - *README.txt add example/test for writing a 3D polygon - -VERSION 1.1.9 - -2013-07-27 Joel Lawhead - *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer - when referencing "z" and "m" values. This bug caused errors only when editing - 3D shapefiles. - -VERSION 1.1.8 - -2013-07-02 Joel Lawhead - *shapefile.py (Writer.poly()) fixed a bug that resulted in incorrect part indexes - *README.txt updated several errors in the documentation. - -2013-06-25 Joel Lawhead - *shapefile.py (Reader.shapes(),Reader.iterShapes()) Updated to verify the file length by - seeking to the end. A user reported shapefiles in the wild which had incorrect .shp file - lengths reported in the header which crashed when reading or iterating shapes. Most - insist on using the .shx file but there's no real reason to do so. - -VERSION 1.1.7 - -2013-06-22 Joel Lawhead - - *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention - to export shapefiles as GeoJSON. - - *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed - as unicode strings. - - *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through - geometry records for parsing large files efficiently. - - *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through - dbf records efficiently in large files. - - *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx - file is not available. - - *shapefile.py (main) Added __version__ attribute. - - *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to - dbf fields. - - *shapefile.py (Reader.__shape) Updated to calculate and seek the start of the next record. The - shapefile spec does not require the content of a geometry record to be as long as the content - length defined in the header. The result is you can delete features without modifying the - record header allowing for empty space in records. - - *shapefile.py (Writer.poly) Added enforcement of closed polygons - - *shapefile.py (Writer.save) Added unique file name generator to use if no file names are passed - to a writer instance when saving (ex. w.save()). The unique file name is returned as a string. - - *README.txt (main) Added tests for iterShapes(), iterRecords(), __geo_interface__() - - *README.txt (main) Updated "bbox" property documentation to match Esri specification. - - - + +VERSION 2.3.1 + +2022-07-28 + Bug fixes: + * Fix recently introduced issue where Reader/Writer closes file-like objects provided by user (#244) + +VERSION 2.3.0 + +2022-04-30 + New Features: + * Added support for pathlib and path-like shapefile filepaths (@mwtoews). + * Allow reading individual file extensions via filepaths. + + Improvements: + * Simplified setup and deployment (@mwtoews) + * Faster shape access when missing shx file + * Switch to named logger (see #240) + + Bug fixes: + * More robust handling of corrupt shapefiles (fixes #235) + * Fix errors when writing to individual file-handles (fixes #237) + * Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) + * Fix test issues in environments without network access (@sebastic, @musicinmybrain). + +VERSION 2.2.0 + +2022-02-02 + New Features: + * Read shapefiles directly from zipfiles. + * Read shapefiles directly from urls. + * Allow fast extraction of only a subset of dbf fields through a `fields` arg. + * Allow fast filtering which shapes to read from the file through a `bbox` arg. + + Improvements: + * More examples and restructuring of README. + * More informative Shape to geojson warnings (see #219). + * Add shapefile.VERBOSE flag to control warnings verbosity (default True). + * Shape object information when calling repr(). + * Faster ring orientation checks, enforce geojson output ring orientation. + + Bug fixes: + * Remove null-padding at end of some record character fields. + * Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. + * Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) + * Fix bug where records and shapes would be assigned incorrect record number (@karanrn) + * Fix typos in docs (@timgates) + +VERSION 2.1.3 + +2021-01-14 + Bug fixes: + * Fix recent bug in geojson hole-in-polygon checking (see #205) + * Misc fixes to allow geo interface dump to json (eg dates as strings) + * Handle additional dbf date null values, and return faulty dates as unicode (see #187) + * Add writer target typecheck + * Fix bugs to allow reading shp/shx/dbf separately + * Allow delayed shapefile loading by passing no args + * Fix error with writing empty z/m shapefile (@mcuprjak) + * Fix signed_area() so ignores z/m coords + * Enforce writing the 11th field name character as null-terminator (only first 10 are used) + * Minor README fixes + * Added more tests + +VERSION 2.1.2 + +2020-09-10 + Bug fixes: + * Fix issue where `warnings.simplefilter('always')` changes global warning behavior [see #203] + +VERSION 2.1.1 + +2020-09-09 + Improvements: + * Handle shapes with no coords and represent as geojson with no coords (GeoJSON null-equivalent) + * Expand testing to Python 3.6, 3.7, 3.8 and PyPy; drop 3.3 and 3.4 [@mwtoews] + * Added pytest testing [@jmoujaes] + + Bug fixes: + * Fix incorrect geo interface handling of multipolygons with complex exterior-hole relations [see #202] + * Enforce shapefile requirement of at least one field, to avoid writing invalid shapefiles [@Jonty] + * Fix Reader geo interface including DeletionFlag field in feature properties [@nnseva] + * Fix polygons not being auto closed, which was accidentally dropped + * Fix error for null geometries in feature geojson + * Misc docstring cleanup [@fiveham] + +VERSION 2.1.0 + +2019-02-15 + New Features: + * Added back read/write support for unicode field names. + * Improved Record representation + * More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() + + Bug fixes: + * Fixed error when reading optional m-values + * Fixed Record attribute autocomplete in Python 3 + * Misc readme cleanup + +VERSION 2.0.1 + +2018-11-05 + * Fix pip install setup.py README decoding error. + +VERSION 2.0.0 + +2018-09-01 + (Note: Some contributor attributions may be missing.) + New Features: + * Full support for unicode text, with custom encoding, and exception handling. + - Means that the Reader returns unicode, and the Writer accepts unicode. + * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. + * Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: + - Specify filepath/destination and text encoding when creating the Writer. + - The file is written incrementally with each call to shape/record. + - Adding shapes is now done using dedicated methods for each shapetype. + * Reading shapefiles is now more convenient: + - Shapefiles can be opened using the context manager, and files are properly closed. + - Shapefiles can be iterated, have a length, and supports the geo interface. + - New ways of inspecing shapefile metadata by printing. [@megies] + - More convenient accessing of Record values as attributes. [@philippkraft] + - More convenient shape type name checking. [@megies] + * Add more support and documentation for MultiPatch 3D shapes. + * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. + * Better documentation of previously unclear aspects, such as field types. + + Bug Fixes: + * More reliable/robust: + - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] + - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] + - Improved parsing of field value types, fixed errors and made more flexible. + - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] + * Fix some geo interface errors, including checking polygon directions. + * Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] + * Enforce maximum field limit. [@mwtoews] + +VERSION 1.2.12 + * ? + +VERSION 1.2.11 + +2017-04-29 Karim Bahgat + * Fixed bugs when reading and writing empty shapefiles. + * Fixed bug when writing null geometry. + * Fixed misc data type errors. + * Fixed error when reading files with wrong record length. + * Use max field precision when saving decimal numbers. + * Improved shapetype detection. + * Expanded docs on data types. + * General doc additions and travis icon. + +VERSION 1.2.10 + +2016-09-24 Karim Bahgat + * Bump version to fix pip install issue. + +VERSION 1.2.9 + +2016-09-22 Karim Bahgat + * Revert back to fix #66. + +VERSION 1.2.8 + +2016-08-17 Joel Lawhead + * Configured Travis-CI + +VERSION 1.2.5 + +2016-08-16 Joel Lawhead + * Reader speed up through batch unpacking bytes + * Merge README text into markdown file. Remove text version. + * Fixed parsing of number of points for some shapes (MULTIPOINTM, MULTIPOINTZ) + +VERSION 1.2.3 + +2015-06-21 Joel Lawhead + *shapefile.py (u) Bugfix for Python3 with Reader.iterShapeRecords() + +VERSION 1.2.2 + +### upcoming (2015/01/09 05:27 +00:00) +- [#11](https://github.com/geospatialpython/pyshp/pull/11) Merge pull request #11 from 7mp/master (@7mp) +- [#1](https://github.com/geospatialpython/pyshp/pull/1) Merge pull request #1 from rgbkrk/patch-1 (@rgbkrk) +- [#13](https://github.com/geospatialpython/pyshp/pull/13) Merge pull request #13 from jzmiller1/patch-1 (@jzmiller1) +- [#16](https://github.com/geospatialpython/pyshp/pull/16) Merge pull request #16 from riggsd/null-friendly (@riggsd) +- [#17](https://github.com/geospatialpython/pyshp/pull/17) Merge pull request #17 from riggsd/no-asserts (@riggsd) +- [#19](https://github.com/geospatialpython/pyshp/pull/19) Merge pull request #19 from razzius/master (@razzius) +- [#20](https://github.com/geospatialpython/pyshp/pull/20) Merge pull request #20 from Brideau/patch-1 (@Brideau) +- [12d69d4](https://github.com/GeospatialPython/pyshp/commit/12d69d47d8c90b445ea22bf5d9530b0c1c710de5) Updated to version 1.2.1 to match PyPI (@GeospatialPython) +- [05b69dc](https://github.com/GeospatialPython/pyshp/commit/05b69dc6b3d58c0dc9a822f6c4b8d45cf8dc9d94) Updated to version 1.2.1 to match PyPI (@GeospatialPython) +- [d2e9f1a](https://github.com/GeospatialPython/pyshp/commit/d2e9f1a41d02cf932484111f45c31781d1f7385a) Typo: recordsIter should be iterRecords (@Brideau) +- [a965aff](https://github.com/GeospatialPython/pyshp/commit/a965aff230aa3f3b85016f7b627609c7e53a2cf9) Format README code sample (@razzius) +- [66e1802](https://github.com/GeospatialPython/pyshp/commit/66e1802013fd3535baa505e15625afaa895ef819) Raise ShapefileException for runtime errors rather than using `assert()` +- [d72723c](https://github.com/GeospatialPython/pyshp/commit/d72723c9e38db8e859b79d95a65c00af1c2ba8ba) Return None when parsing (illegal) NULL attribute values in numeric and date fields, like those produced by QGIS +- [783e68c](https://github.com/GeospatialPython/pyshp/commit/783e68c75b8f20c7656ea470dbc5e9496a8ee0ac) Update link to "XBase File Format Description" (@jzmiller1) +- [79cc409](https://github.com/GeospatialPython/pyshp/commit/79cc409362a24caf4a21923419490ee95d557dc3) Added `Reader.iterShapeRecords` to help work with larger files +- [18c5521](https://github.com/GeospatialPython/pyshp/commit/18c5521b89cd1d7968dff8eb03c1ec37ab4307c5) URL Change (@rgbkrk) +- [202143c](https://github.com/GeospatialPython/pyshp/commit/202143c823407ffea07b5400e77b9ded7169f696) README.md TOC Take 2 +- [2cca75c](https://github.com/GeospatialPython/pyshp/commit/2cca75cd09b27bb19a77ffeb68afc535e3c33802) README.md TOC +- [8b5e994](https://github.com/GeospatialPython/pyshp/commit/8b5e994905fd4a70c0f7ce6d814346e6666b280c) README.md +- [f31a3d7](https://github.com/GeospatialPython/pyshp/commit/f31a3d773dd22e65d3e38ad8b034f186a05b4c4d) Update README.txt (@GeospatialPython) + +VERSION 1.2.1 + +2014-05-11 Joel Lawhead + *shapefile.py (u) fixed bug which failed to properly read some dbf fields in Python 3 + +VERSION 1.2.0 + +2013-09-05 Joel Lawhead + *README.txt add example/test for writing a 3D polygon + +VERSION 1.1.9 + +2013-07-27 Joel Lawhead + *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer + when referencing "z" and "m" values. This bug caused errors only when editing + 3D shapefiles. + +VERSION 1.1.8 + +2013-07-02 Joel Lawhead + *shapefile.py (Writer.poly()) fixed a bug that resulted in incorrect part indexes + *README.txt updated several errors in the documentation. + +2013-06-25 Joel Lawhead + *shapefile.py (Reader.shapes(),Reader.iterShapes()) Updated to verify the file length by + seeking to the end. A user reported shapefiles in the wild which had incorrect .shp file + lengths reported in the header which crashed when reading or iterating shapes. Most + insist on using the .shx file but there's no real reason to do so. + +VERSION 1.1.7 + +2013-06-22 Joel Lawhead + + *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention + to export shapefiles as GeoJSON. + + *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed + as unicode strings. + + *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through + geometry records for parsing large files efficiently. + + *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through + dbf records efficiently in large files. + + *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx + file is not available. + + *shapefile.py (main) Added __version__ attribute. + + *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to + dbf fields. + + *shapefile.py (Reader.__shape) Updated to calculate and seek the start of the next record. The + shapefile spec does not require the content of a geometry record to be as long as the content + length defined in the header. The result is you can delete features without modifying the + record header allowing for empty space in records. + + *shapefile.py (Writer.poly) Added enforcement of closed polygons + + *shapefile.py (Writer.save) Added unique file name generator to use if no file names are passed + to a writer instance when saving (ex. w.save()). The unique file name is returned as a string. + + *README.txt (main) Added tests for iterShapes(), iterRecords(), __geo_interface__() + + *README.txt (main) Updated "bbox" property documentation to match Esri specification. + + + diff --git a/pyproject.toml b/pyproject.toml index fed528d4..df8e737f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,89 @@ [build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" + + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.9 +target-version = "py37" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + + + +[tool.pylint.MASTER] +load-plugins=[ + "pylint_per_file_ignores", +] + +[tool.pylint.'MESSAGES CONTROL'] +# Silence warning: shapefile.py:2076:20: W0212: Access to a protected +# member _from_geojson of a client class (protected-access) +# +# Silence warnings: test_shapefile.py:{783,786,799,803,06,1195}:19: +# W0212: Access to a protected member _offsets of a +# client class (protected-access) +# +# Toml multi-line string used instead of array due to: +# https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 +per-file-ignores = """ + shapefile.py:W0212 + test_shapefile.py:W0212 +""" \ No newline at end of file diff --git a/requirements.test.txt b/requirements.test.txt index b3eaa8c8..11141738 100644 --- a/requirements.test.txt +++ b/requirements.test.txt @@ -1,2 +1,2 @@ -pytest==3.2.5 +pytest >= 3.7 setuptools diff --git a/shapefile.py b/shapefile.py index 04fb5ec0..211fd48f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1,2671 +1,2945 @@ -""" -shapefile.py -Provides read and write support for ESRI Shapefiles. -authors: jlawheadgeospatialpython.com -maintainer: karim.bahgat.norwaygmail.com -Compatible with Python versions 2.7-3.x -""" - -__version__ = "2.3.1" - -from struct import pack, unpack, calcsize, error, Struct -import os -import sys -import time -import array -import tempfile -import logging -import io -from datetime import date -import zipfile - -# Create named logger -logger = logging.getLogger(__name__) - - -# Module settings -VERBOSE = True - -# Constants for shape types -NULL = 0 -POINT = 1 -POLYLINE = 3 -POLYGON = 5 -MULTIPOINT = 8 -POINTZ = 11 -POLYLINEZ = 13 -POLYGONZ = 15 -MULTIPOINTZ = 18 -POINTM = 21 -POLYLINEM = 23 -POLYGONM = 25 -MULTIPOINTM = 28 -MULTIPATCH = 31 - -SHAPETYPE_LOOKUP = { - 0: 'NULL', - 1: 'POINT', - 3: 'POLYLINE', - 5: 'POLYGON', - 8: 'MULTIPOINT', - 11: 'POINTZ', - 13: 'POLYLINEZ', - 15: 'POLYGONZ', - 18: 'MULTIPOINTZ', - 21: 'POINTM', - 23: 'POLYLINEM', - 25: 'POLYGONM', - 28: 'MULTIPOINTM', - 31: 'MULTIPATCH'} - -TRIANGLE_STRIP = 0 -TRIANGLE_FAN = 1 -OUTER_RING = 2 -INNER_RING = 3 -FIRST_RING = 4 -RING = 5 - -PARTTYPE_LOOKUP = { - 0: 'TRIANGLE_STRIP', - 1: 'TRIANGLE_FAN', - 2: 'OUTER_RING', - 3: 'INNER_RING', - 4: 'FIRST_RING', - 5: 'RING'} - - -# Python 2-3 handling - -PYTHON3 = sys.version_info[0] == 3 - -if PYTHON3: - xrange = range - izip = zip - - from urllib.parse import urlparse, urlunparse - from urllib.error import HTTPError - from urllib.request import urlopen, Request - -else: - from itertools import izip - - from urlparse import urlparse, urlunparse - from urllib2 import HTTPError - from urllib2 import urlopen, Request - - -# Helpers - -MISSING = [None,''] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. - -if PYTHON3: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - elif isinstance(v, str): - # Already str. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, str) - -else: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, unicode): - # For python 2 encode unicode to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return unicode(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 2 decode bytes to unicode. - return v.decode(encoding, encodingErrors) - elif isinstance(v, unicode): - # Already unicode. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return u"" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, basestring) - -if sys.version_info[0:2] >= (3, 6): - def pathlike_obj(path): - if isinstance(path, os.PathLike): - return os.fsdecode(path) - else: - return path -else: - def pathlike_obj(path): - if is_string(path): - return path - elif hasattr(path, "__fspath__"): - return path.__fspath__() - else: - try: - return str(path) - except: - return path - - -# Begin - -class _Array(array.array): - """Converts python tuples to lists of the appropriate type. - Used to unpack different shapefile header parts.""" - def __repr__(self): - return str(self.tolist()) - -def signed_area(coords, fast=False): - """Return the signed area enclosed by a ring using the linear time - algorithm. A value >= 0 indicates a counter-clockwise oriented ring. - A faster version is possible by setting 'fast' to True, which returns - 2x the area, e.g. if you're only interested in the sign of the area. - """ - xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values - xs.append(xs[1]) - ys.append(ys[1]) - area2 = sum(xs[i]*(ys[i+1]-ys[i-1]) for i in range(1, len(coords))) - if fast: - return area2 - else: - return area2 / 2.0 - -def is_cw(coords): - """Returns True if a polygon ring has clockwise orientation, determined - by a negatively signed area. - """ - area2 = signed_area(coords, fast=True) - return area2 < 0 - -def rewind(coords): - """Returns the input coords in reversed order. - """ - return list(reversed(coords)) - -def ring_bbox(coords): - """Calculates and returns the bounding box of a ring. - """ - xs,ys = zip(*coords) - bbox = min(xs),min(ys),max(xs),max(ys) - return bbox - -def bbox_overlap(bbox1, bbox2): - """Tests whether two bounding boxes overlap, returning a boolean - """ - xmin1,ymin1,xmax1,ymax1 = bbox1 - xmin2,ymin2,xmax2,ymax2 = bbox2 - overlap = (xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2) - return overlap - -def bbox_contains(bbox1, bbox2): - """Tests whether bbox1 fully contains bbox2, returning a boolean - """ - xmin1,ymin1,xmax1,ymax1 = bbox1 - xmin2,ymin2,xmax2,ymax2 = bbox2 - contains = (xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2) - return contains - -def ring_contains_point(coords, p): - """Fast point-in-polygon crossings algorithm, MacMartin optimization. - - Adapted from code by Eric Haynes - http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c - - Original description: - Shoot a test ray along +X axis. The strategy, from MacMartin, is to - compare vertex Y values to the testing point's Y and quickly discard - edges which are entirely to one side of the test ray. - """ - tx,ty = p - - # get initial test bit for above/below X axis - vtx0 = coords[0] - yflag0 = ( vtx0[1] >= ty ) - - inside_flag = False - for vtx1 in coords[1:]: - yflag1 = ( vtx1[1] >= ty ) - # check if endpoints straddle (are on opposite sides) of X axis - # (i.e. the Y's differ); if so, +X ray could intersect this edge. - if yflag0 != yflag1: - xflag0 = ( vtx0[0] >= tx ) - # check if endpoints are on same side of the Y axis (i.e. X's - # are the same); if so, it's easy to test if edge hits or misses. - if xflag0 == ( vtx1[0] >= tx ): - # if edge's X values both right of the point, must hit - if xflag0: - inside_flag = not inside_flag - else: - # compute intersection of pgon segment with +X ray, note - # if >= point's X; if so, the ray hits it. - if ( vtx1[0] - (vtx1[1]-ty) * ( vtx0[0]-vtx1[0]) / (vtx0[1]-vtx1[1]) ) >= tx: - inside_flag = not inside_flag - - # move to next pair of vertices, retaining info as possible - yflag0 = yflag1 - vtx0 = vtx1 - - return inside_flag - -def ring_sample(coords, ccw=False): - """Return a sample point guaranteed to be within a ring, by efficiently - finding the first centroid of a coordinate triplet whose orientation - matches the orientation of the ring and passes the point-in-ring test. - The orientation of the ring is assumed to be clockwise, unless ccw - (counter-clockwise) is set to True. - """ - triplet = [] - def itercoords(): - # iterate full closed ring - for p in coords: - yield p - # finally, yield the second coordinate to the end to allow checking the last triplet - yield coords[1] - - for p in itercoords(): - # add point to triplet (but not if duplicate) - if p not in triplet: - triplet.append(p) - - # new triplet, try to get sample - if len(triplet) == 3: - # check that triplet does not form a straight line (not a triangle) - is_straight_line = (triplet[0][1] - triplet[1][1]) * (triplet[0][0] - triplet[2][0]) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) - if not is_straight_line: - # get triplet orientation - closed_triplet = triplet + [triplet[0]] - triplet_ccw = not is_cw(closed_triplet) - # check that triplet has the same orientation as the ring (means triangle is inside the ring) - if ccw == triplet_ccw: - # get triplet centroid - xs,ys = zip(*triplet) - xmean,ymean = sum(xs) / 3.0, sum(ys) / 3.0 - # check that triplet centroid is truly inside the ring - if ring_contains_point(coords, (xmean,ymean)): - return xmean,ymean - - # failed to get sample point from this triplet - # remove oldest triplet coord to allow iterating to next triplet - triplet.pop(0) - - else: - raise Exception('Unexpected error: Unable to find a ring sample point.') - -def ring_contains_ring(coords1, coords2): - '''Returns True if all vertexes in coords2 are fully inside coords1. - ''' - return all((ring_contains_point(coords1, p2) for p2 in coords2)) - -def organize_polygon_rings(rings, return_errors=None): - '''Organize a list of coordinate rings into one or more polygons with holes. - Returns a list of polygons, where each polygon is composed of a single exterior - ring, and one or more interior holes. If a return_errors dict is provided (optional), - any errors encountered will be added to it. - - Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). - Rings are determined as exteriors if they run in clockwise direction, or interior - holes if they run in counter-clockwise direction. This method is used to construct - GeoJSON (multi)polygons from the shapefile polygon shape type, which does not - explicitly store the structure of the polygons beyond exterior/interior ring orientation. - ''' - # first iterate rings and classify as exterior or hole - exteriors = [] - holes = [] - for ring in rings: - # shapefile format defines a polygon as a sequence of rings - # where exterior rings are clockwise, and holes counterclockwise - if is_cw(ring): - # ring is exterior - exteriors.append(ring) - else: - # ring is a hole - holes.append(ring) - - # if only one exterior, then all holes belong to that exterior - if len(exteriors) == 1: - # exit early - poly = [exteriors[0]] + holes - polys = [poly] - return polys - - # multiple exteriors, ie multi-polygon, have to group holes with correct exterior - # shapefile format does not specify which holes belong to which exteriors - # so have to do efficient multi-stage checking of hole-to-exterior containment - elif len(exteriors) > 1: - # exit early if no holes - if not holes: - polys = [] - for ext in exteriors: - poly = [ext] - polys.append(poly) - return polys - - # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = dict([(hole_i,[]) for hole_i in xrange(len(holes))]) - exterior_bboxes = [ring_bbox(ring) for ring in exteriors] - for hole_i in hole_exteriors.keys(): - hole_bbox = ring_bbox(holes[hole_i]) - for ext_i,ext_bbox in enumerate(exterior_bboxes): - if bbox_contains(ext_bbox, hole_bbox): - hole_exteriors[hole_i].append( ext_i ) - - # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test - for hole_i,exterior_candidates in hole_exteriors.items(): - - if len(exterior_candidates) > 1: - # get hole sample point - ccw = not is_cw(holes[hole_i]) - hole_sample = ring_sample(holes[hole_i], ccw=ccw) - # collect new exterior candidates - new_exterior_candidates = [] - for ext_i in exterior_candidates: - # check that hole sample point is inside exterior - hole_in_exterior = ring_contains_point(exteriors[ext_i], hole_sample) - if hole_in_exterior: - new_exterior_candidates.append(ext_i) - - # set new exterior candidates - hole_exteriors[hole_i] = new_exterior_candidates - - # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole - for hole_i,exterior_candidates in hole_exteriors.items(): - - if len(exterior_candidates) > 1: - # exterior candidate with the smallest area is the hole's most immediate parent - ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x], fast=True)))[0] - hole_exteriors[hole_i] = [ext_i] - - # separate out holes that are orphaned (not contained by any exterior) - orphan_holes = [] - for hole_i,exterior_candidates in list(hole_exteriors.items()): - if not exterior_candidates: - orphan_holes.append( hole_i ) - del hole_exteriors[hole_i] - continue - - # each hole should now only belong to one exterior, group into exterior-holes polygons - polys = [] - for ext_i,ext in enumerate(exteriors): - poly = [ext] - # find relevant holes - poly_holes = [] - for hole_i,exterior_candidates in list(hole_exteriors.items()): - # hole is relevant if previously matched with this exterior - if exterior_candidates[0] == ext_i: - poly_holes.append( holes[hole_i] ) - poly += poly_holes - polys.append(poly) - - # add orphan holes as exteriors - for hole_i in orphan_holes: - ext = holes[hole_i] - # add as single exterior without any holes - poly = [ext] - polys.append(poly) - - if orphan_holes and return_errors is not None: - return_errors['polygon_orphaned_holes'] = len(orphan_holes) - - return polys - - # no exteriors, be nice and assume due to incorrect winding order - else: - if return_errors is not None: - return_errors['polygon_only_holes'] = len(holes) - exteriors = holes - # add as single exterior without any holes - polys = [[ext] for ext in exteriors] - return polys - -class Shape(object): - def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None): - """Stores the geometry of the different shape types - specified in the Shapefile spec. Shape types are - usually point, polyline, or polygons. Every shape type - except the "Null" type contains points at some level for - example vertices in a polygon. If a shape type has - multiple shapes containing points within a single - geometry record then those shapes are called parts. Parts - are designated by their starting index in geometry record's - list of shapes. For MultiPatch geometry, partTypes designates - the patch type of each of the parts. - """ - self.shapeType = shapeType - self.points = points or [] - self.parts = parts or [] - if partTypes: - self.partTypes = partTypes - - # and a dict to silently record any errors encountered - self._errors = {} - - # add oid - if oid is not None: - self.__oid = oid - else: - self.__oid = -1 - - @property - def __geo_interface__(self): - if self.shapeType in [POINT, POINTM, POINTZ]: - # point - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'Point', 'coordinates':tuple()} - else: - return { - 'type': 'Point', - 'coordinates': tuple(self.points[0]) - } - elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'MultiPoint', 'coordinates':[]} - else: - # multipoint - return { - 'type': 'MultiPoint', - 'coordinates': [tuple(p) for p in self.points] - } - elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'LineString', 'coordinates':[]} - elif len(self.parts) == 1: - # linestring - return { - 'type': 'LineString', - 'coordinates': [tuple(p) for p in self.points] - } - else: - # multilinestring - ps = None - coordinates = [] - for part in self.parts: - if ps == None: - ps = part - continue - else: - coordinates.append([tuple(p) for p in self.points[ps:part]]) - ps = part - else: - coordinates.append([tuple(p) for p in self.points[part:]]) - return { - 'type': 'MultiLineString', - 'coordinates': coordinates - } - elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'Polygon', 'coordinates':[]} - else: - # get all polygon rings - rings = [] - for i in xrange(len(self.parts)): - # get indexes of start and end points of the ring - start = self.parts[i] - try: - end = self.parts[i+1] - except IndexError: - end = len(self.points) - - # extract the points that make up the ring - ring = [tuple(p) for p in self.points[start:end]] - rings.append(ring) - - # organize rings into list of polygons, where each polygon is defined as list of rings. - # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). - polys = organize_polygon_rings(rings, self._errors) - - # if VERBOSE is True, issue detailed warning about any shape errors - # encountered during the Shapefile to GeoJSON conversion - if VERBOSE and self._errors: - header = 'Possible issue encountered when converting Shape #{} to GeoJSON: '.format(self.oid) - orphans = self._errors.get('polygon_orphaned_holes', None) - if orphans: - msg = header + 'Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ -but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ -orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ -encoded as GeoJSON exterior rings instead of holes.' - logger.warning(msg) - only_holes = self._errors.get('polygon_only_holes', None) - if only_holes: - msg = header + 'Shapefile format requires that polygons contain at least one exterior ring, \ -but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ -still included but were encoded as GeoJSON exterior rings instead of holes.' - logger.warning(msg) - - # return as geojson - if len(polys) == 1: - return { - 'type': 'Polygon', - 'coordinates': polys[0] - } - else: - return { - 'type': 'MultiPolygon', - 'coordinates': polys - } - - else: - raise Exception('Shape type "%s" cannot be represented as GeoJSON.' % SHAPETYPE_LOOKUP[self.shapeType]) - - @staticmethod - def _from_geojson(geoj): - # create empty shape - shape = Shape() - # set shapeType - geojType = geoj["type"] if geoj else "Null" - if geojType == "Null": - shapeType = NULL - elif geojType == "Point": - shapeType = POINT - elif geojType == "LineString": - shapeType = POLYLINE - elif geojType == "Polygon": - shapeType = POLYGON - elif geojType == "MultiPoint": - shapeType = MULTIPOINT - elif geojType == "MultiLineString": - shapeType = POLYLINE - elif geojType == "MultiPolygon": - shapeType = POLYGON - else: - raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) - shape.shapeType = shapeType - - # set points and parts - if geojType == "Point": - shape.points = [ geoj["coordinates"] ] - shape.parts = [0] - elif geojType in ("MultiPoint","LineString"): - shape.points = geoj["coordinates"] - shape.parts = [0] - elif geojType in ("Polygon"): - points = [] - parts = [] - index = 0 - for i,ext_or_hole in enumerate(geoj["coordinates"]): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - elif geojType in ("MultiLineString"): - points = [] - parts = [] - index = 0 - for linestring in geoj["coordinates"]: - points.extend(linestring) - parts.append(index) - index += len(linestring) - shape.points = points - shape.parts = parts - elif geojType in ("MultiPolygon"): - points = [] - parts = [] - index = 0 - for polygon in geoj["coordinates"]: - for i,ext_or_hole in enumerate(polygon): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - return shape - - @property - def oid(self): - """The index position of the shape in the original shapefile""" - return self.__oid - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def __repr__(self): - return 'Shape #{}: {}'.format(self.__oid, self.shapeTypeName) - -class _Record(list): - """ - A class to hold a record. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, the values of the record - can also be retrieved using the field's name. For example if the dbf contains - a field ID at position 0, the ID can be retrieved with the position, the field name - as a key, or the field name as an attribute. - - >>> # Create a Record with one field, normally the record is created by the Reader class - >>> r = _Record({'ID': 0}, [0]) - >>> print(r[0]) - >>> print(r['ID']) - >>> print(r.ID) - """ - - def __init__(self, field_positions, values, oid=None): - """ - A Record should be created by the Reader class - - :param field_positions: A dict mapping field names to field positions - :param values: A sequence of values - :param oid: The object id, an int (optional) - """ - self.__field_positions = field_positions - if oid is not None: - self.__oid = oid - else: - self.__oid = -1 - list.__init__(self, values) - - def __getattr__(self, item): - """ - __getattr__ is called if an attribute is used that does - not exist in the normal sense. For example r=Record(...), r.ID - calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) - :param item: The field name, used as attribute - :return: Value of the field - :raises: AttributeError, if item is not a field of the shapefile - and IndexError, if the field exists but the field's - corresponding value in the Record does not exist - """ - try: - index = self.__field_positions[item] - return list.__getitem__(self, index) - except KeyError: - raise AttributeError('{} is not a field name'.format(item)) - except IndexError: - raise IndexError('{} found as a field but not enough values available.'.format(item)) - - def __setattr__(self, key, value): - """ - Sets a value of a field attribute - :param key: The field name - :param value: the value of that field - :return: None - :raises: AttributeError, if key is not a field of the shapefile - """ - if key.startswith('_'): # Prevent infinite loop when setting mangled attribute - return list.__setattr__(self, key, value) - try: - index = self.__field_positions[key] - return list.__setitem__(self, index, value) - except KeyError: - raise AttributeError('{} is not a field name'.format(key)) - - def __getitem__(self, item): - """ - Extends the normal list item access with - access using a fieldname - - For example r['ID'], r[0] - :param item: Either the position of the value or the name of a field - :return: the value of the field - """ - try: - return list.__getitem__(self, item) - except TypeError: - try: - index = self.__field_positions[item] - except KeyError: - index = None - if index is not None: - return list.__getitem__(self, index) - else: - raise IndexError('"{}" is not a field name and not an int'.format(item)) - - def __setitem__(self, key, value): - """ - Extends the normal list item access with - access using a fieldname - - For example r['ID']=2, r[0]=2 - :param key: Either the position of the value or the name of a field - :param value: the new value of the field - """ - try: - return list.__setitem__(self, key, value) - except TypeError: - index = self.__field_positions.get(key) - if index is not None: - return list.__setitem__(self, index, value) - else: - raise IndexError('{} is not a field name and not an int'.format(key)) - - @property - def oid(self): - """The index position of the record in the original shapefile""" - return self.__oid - - def as_dict(self, date_strings=False): - """ - Returns this Record as a dictionary using the field names as keys - :return: dict - """ - dct = dict((f, self[i]) for f, i in self.__field_positions.items()) - if date_strings: - for k,v in dct.items(): - if isinstance(v, date): - dct[k] = '{:04d}{:02d}{:02d}'.format(v.year, v.month, v.day) - return dct - - def __repr__(self): - return 'Record #{}: {}'.format(self.__oid, list(self)) - - def __dir__(self): - """ - Helps to show the field names in an interactive environment like IPython. - See: http://ipython.readthedocs.io/en/stable/config/integrating.html - - :return: List of method names and fields - """ - default = list(dir(type(self))) # default list methods and attributes of this class - fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) - return default + fnames - -class ShapeRecord(object): - """A ShapeRecord object containing a shape along with its attributes. - Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" - def __init__(self, shape=None, record=None): - self.shape = shape - self.record = record - - @property - def __geo_interface__(self): - return {'type': 'Feature', - 'properties': self.record.as_dict(date_strings=True), - 'geometry': None if self.shape.shapeType == NULL else self.shape.__geo_interface__} - -class Shapes(list): - """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, this also provides the GeoJSON __geo_interface__ - to return a GeometryCollection dictionary.""" - - def __repr__(self): - return 'Shapes: {}'.format(list(self)) - - @property - def __geo_interface__(self): - # Note: currently this will fail if any of the shapes are null-geometries - # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords - collection = {'type': 'GeometryCollection', - 'geometries': [shape.__geo_interface__ for shape in self]} - return collection - -class ShapeRecords(list): - """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, this also provides the GeoJSON __geo_interface__ - to return a FeatureCollection dictionary.""" - - def __repr__(self): - return 'ShapeRecords: {}'.format(list(self)) - - @property - def __geo_interface__(self): - collection = {'type': 'FeatureCollection', - 'features': [shaperec.__geo_interface__ for shaperec in self]} - return collection - -class ShapefileException(Exception): - """An exception to handle shapefile specific problems.""" - pass - -# def warn_geojson_collection(shapes): -# # collect information about any potential errors with the GeoJSON -# errors = {} -# for i,shape in enumerate(shapes): -# shape_errors = shape._errors -# if shape_errors: -# for error in shape_errors.keys(): -# errors[error] = errors[error] + [i] if error in errors else [] - -# # warn if any errors were found -# if errors: -# messages = ['Summary of possibles issues encountered during shapefile to GeoJSON conversion:'] - -# # polygon orphan holes -# orphans = errors.get('polygon_orphaned_holes', None) -# if orphans: -# msg = 'GeoJSON format requires that all interior holes be contained by an exterior ring, \ -# but the Shapefile contained {} records of polygons where some of its interior holes were \ -# orphaned (not contained by any other rings). The rings were still included but were \ -# encoded as GeoJSON exterior rings instead of holes. Shape ids: {}'.format(len(orphans), orphans) -# messages.append(msg) - -# # polygon only holes/wrong orientation -# only_holes = errors.get('polygon_only_holes', None) -# if only_holes: -# msg = 'GeoJSON format requires that polygons contain at least one exterior ring, but \ -# the Shapefile contained {} records of polygons where all of its component rings were stored as interior \ -# holes. The rings were still included but were encoded as GeoJSON exterior rings instead of holes. \ -# Shape ids: {}'.format(len(only_holes), only_holes) -# messages.append(msg) - -# if len(messages) > 1: -# # more than just the "Summary of..." header -# msg = '\n'.join(messages) -# logger.warning(msg) - -class Reader(object): - """Reads the three files of a shapefile as a unit or - separately. If one of the three files (.shp, .shx, - .dbf) is missing no exception is thrown until you try - to call a method that depends on that particular file. - The .shx index file is used if available for efficiency - but is not required to read the geometry from the .shp - file. The "shapefile" argument in the constructor is the - name of the file you want to open, and can be the path - to a shapefile on a local filesystem, inside a zipfile, - or a url. - - You can instantiate a Reader without specifying a shapefile - and then specify one later with the load() method. - - Only the shapefile headers are read upon loading. Content - within each file is only accessed when required and as - efficiently as possible. Shapefiles are usually not large - but they can be. - """ - def __init__(self, *args, **kwargs): - self.shp = None - self.shx = None - self.dbf = None - self._files_to_close = [] - self.shapeName = "Not specified" - self._offsets = [] - self.shpLength = None - self.numRecords = None - self.numShapes = None - self.fields = [] - self.__dbfHdrLength = 0 - self.__fieldLookup = {} - self.encoding = kwargs.pop('encoding', 'utf-8') - self.encodingErrors = kwargs.pop('encodingErrors', 'strict') - # See if a shapefile name was passed as the first argument - if len(args) > 0: - path = pathlike_obj(args[0]) - if is_string(path): - - if '.zip' in path: - # Shapefile is inside a zipfile - if path.count('.zip') > 1: - # Multiple nested zipfiles - raise ShapefileException('Reading from multiple nested zipfiles is not supported: %s' % path) - # Split into zipfile and shapefile paths - if path.endswith('.zip'): - zpath = path - shapefile = None - else: - zpath = path[:path.find('.zip')+4] - shapefile = path[path.find('.zip')+4+1:] - # Create a zip file handle - if zpath.startswith('http'): - # Zipfile is from a url - # Download to a temporary url and treat as normal zipfile - req = Request(zpath, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) - resp = urlopen(req) - # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected - zipfileobj = tempfile.NamedTemporaryFile(mode='w+b', suffix='.zip', delete=True) - zipfileobj.write(resp.read()) - zipfileobj.seek(0) - else: - # Zipfile is from a file - zipfileobj = open(zpath, mode='rb') - # Open the zipfile archive - with zipfile.ZipFile(zipfileobj, 'r') as archive: - if not shapefile: - # Only the zipfile path is given - # Inspect zipfile contents to find the full shapefile path - shapefiles = [name - for name in archive.namelist() - if name.endswith('.shp')] - # The zipfile must contain exactly one shapefile - if len(shapefiles) == 0: - raise ShapefileException('Zipfile does not contain any shapefiles') - elif len(shapefiles) == 1: - shapefile = shapefiles[0] - else: - raise ShapefileException('Zipfile contains more than one shapefile: %s. Please specify the full \ - path to the shapefile you would like to open.' % shapefiles ) - # Try to extract file-like objects from zipfile - shapefile = os.path.splitext(shapefile)[0] # root shapefile name - for ext in ['shp','shx','dbf']: - try: - member = archive.open(shapefile+'.'+ext) - # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) - fileobj.write(member.read()) - fileobj.seek(0) - setattr(self, ext, fileobj) - self._files_to_close.append(fileobj) - except: - pass - # Close and delete the temporary zipfile - try: zipfileobj.close() - except: pass - # Try to load shapefile - if (self.shp or self.dbf): - # Load and exit early - self.load() - return - else: - raise ShapefileException("No shp or dbf file found in zipfile: %s" % path) - - elif path.startswith('http'): - # Shapefile is from a url - # Download each file to temporary path and treat as normal shapefile path - urlinfo = urlparse(path) - urlpath = urlinfo[2] - urlpath,_ = os.path.splitext(urlpath) - shapefile = os.path.basename(urlpath) - for ext in ['shp','shx','dbf']: - try: - _urlinfo = list(urlinfo) - _urlinfo[2] = urlpath + '.' + ext - _path = urlunparse(_urlinfo) - req = Request(_path, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) - resp = urlopen(req) - # write url data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) - fileobj.write(resp.read()) - fileobj.seek(0) - setattr(self, ext, fileobj) - self._files_to_close.append(fileobj) - except HTTPError: - pass - if (self.shp or self.dbf): - # Load and exit early - self.load() - return - else: - raise ShapefileException("No shp or dbf file found at url: %s" % path) - - else: - # Local file path to a shapefile - # Load and exit early - self.load(path) - return - - # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) - if "shp" in kwargs.keys(): - if hasattr(kwargs["shp"], "read"): - self.shp = kwargs["shp"] - # Copy if required - try: - self.shp.seek(0) - except (NameError, io.UnsupportedOperation): - self.shp = io.BytesIO(self.shp.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shp"]) - self.load_shp(baseName) - - if "shx" in kwargs.keys(): - if hasattr(kwargs["shx"], "read"): - self.shx = kwargs["shx"] - # Copy if required - try: - self.shx.seek(0) - except (NameError, io.UnsupportedOperation): - self.shx = io.BytesIO(self.shx.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shx"]) - self.load_shx(baseName) - - if "dbf" in kwargs.keys(): - if hasattr(kwargs["dbf"], "read"): - self.dbf = kwargs["dbf"] - # Copy if required - try: - self.dbf.seek(0) - except (NameError, io.UnsupportedOperation): - self.dbf = io.BytesIO(self.dbf.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["dbf"]) - self.load_dbf(baseName) - - # Load the files - if self.shp or self.dbf: - self.load() - - def __str__(self): - """ - Use some general info on the shapefile as __str__ - """ - info = ['shapefile Reader'] - if self.shp: - info.append(" {} shapes (type '{}')".format( - len(self), SHAPETYPE_LOOKUP[self.shapeType])) - if self.dbf: - info.append(' {} records ({} fields)'.format( - len(self), len(self.fields))) - return '\n'.join(info) - - def __enter__(self): - """ - Enter phase of context manager. - """ - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Exit phase of context manager, close opened files. - """ - self.close() - - def __len__(self): - """Returns the number of shapes/records in the shapefile.""" - if self.dbf: - # Preferably use dbf record count - if self.numRecords is None: - self.__dbfHeader() - - return self.numRecords - - elif self.shp: - # Otherwise use shape count - if self.shx: - if self.numShapes is None: - self.__shxHeader() - - return self.numShapes - - else: - # Index file not available, iterate all shapes to get total count - if self.numShapes is None: - # Determine length of shp file - shp = self.shp - checkpoint = shp.tell() - shp.seek(0,2) - shpLength = shp.tell() - shp.seek(100) - # Do a fast shape iteration until end of file. - unpack = Struct('>2i').unpack - offsets = [] - pos = shp.tell() - while pos < shpLength: - offsets.append(pos) - # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) - # Jump to next shape position - pos += 8 + (2 * recLength) - shp.seek(pos) - # Set numShapes and offset indices - self.numShapes = len(offsets) - self._offsets = offsets - # Return to previous file position - shp.seek(checkpoint) - - return self.numShapes - - else: - # No file loaded yet, treat as 'empty' shapefile - return 0 - - def __iter__(self): - """Iterates through the shapes/records in the shapefile.""" - for shaperec in self.iterShapeRecords(): - yield shaperec - - @property - def __geo_interface__(self): - shaperecords = self.shapeRecords() - fcollection = shaperecords.__geo_interface__ - fcollection['bbox'] = list(self.bbox) - return fcollection - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def load(self, shapefile=None): - """Opens a shapefile from a filename or file-like - object. Normally this method would be called by the - constructor with the file name as an argument.""" - if shapefile: - (shapeName, ext) = os.path.splitext(shapefile) - self.shapeName = shapeName - self.load_shp(shapeName) - self.load_shx(shapeName) - self.load_dbf(shapeName) - if not (self.shp or self.dbf): - raise ShapefileException("Unable to open %s.dbf or %s.shp." % (shapeName, shapeName)) - if self.shp: - self.__shpHeader() - if self.dbf: - self.__dbfHeader() - if self.shx: - self.__shxHeader() - - def load_shp(self, shapefile_name): - """ - Attempts to load file with .shp extension as both lower and upper case - """ - shp_ext = 'shp' - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") - self._files_to_close.append(self.shp) - except IOError: - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") - self._files_to_close.append(self.shp) - except IOError: - pass - - def load_shx(self, shapefile_name): - """ - Attempts to load file with .shx extension as both lower and upper case - """ - shx_ext = 'shx' - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") - self._files_to_close.append(self.shx) - except IOError: - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") - self._files_to_close.append(self.shx) - except IOError: - pass - - def load_dbf(self, shapefile_name): - """ - Attempts to load file with .dbf extension as both lower and upper case - """ - dbf_ext = 'dbf' - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") - self._files_to_close.append(self.dbf) - except IOError: - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") - self._files_to_close.append(self.dbf) - except IOError: - pass - - def __del__(self): - self.close() - - def close(self): - # Close any files that the reader opened (but not those given by user) - for attribute in self._files_to_close: - if hasattr(attribute, 'close'): - try: - attribute.close() - except IOError: - pass - self._files_to_close = [] - - def __getFileObj(self, f): - """Checks to see if the requested shapefile file object is - available. If not a ShapefileException is raised.""" - if not f: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") - if self.shp and self.shpLength is None: - self.load() - if self.dbf and len(self.fields) == 0: - self.load() - return f - - def __restrictIndex(self, i): - """Provides list-like handling of a record index with a clearer - error message if the index is out of bounds.""" - if self.numRecords: - rmax = self.numRecords - 1 - if abs(i) > rmax: - raise IndexError("Shape or Record index out of range.") - if i < 0: i = range(self.numRecords)[i] - return i - - def __shpHeader(self): - """Reads the header information from a .shp file.""" - if not self.shp: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shp file found") - shp = self.shp - # File length (16-bit word * 2 = bytes) - shp.seek(24) - self.shpLength = unpack(">i", shp.read(4))[0] * 2 - # Shape type - shp.seek(32) - self.shapeType= unpack(" NODATA: - self.mbox.append(m) - else: - self.mbox.append(None) - - def __shape(self, oid=None, bbox=None): - """Returns the header info and geometry for a single shape.""" - f = self.__getFileObj(self.shp) - record = Shape(oid=oid) - nParts = nPoints = zmin = zmax = mmin = mmax = None - (recNum, recLength) = unpack(">2i", f.read(8)) - # Determine the start of the next record - next = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - (mmin, mmax) = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next - f.tell() >= nPoints * 8: - record.m = [] - for m in _Array('d', unpack("<%sd" % nPoints, f.read(nPoints * 8))): - if m > NODATA: - record.m.append(m) - else: - record.m.append(None) - else: - record.m = [None for _ in range(nPoints)] - # Read a single point - if shapeType in (1,11,21): - record.points = [_Array('d', unpack("<2d", f.read(16)))] - # Read a single Z value - if shapeType == 11: - record.z = list(unpack("= 8: - (m,) = unpack(" NODATA: - record.m = [m] - else: - record.m = [None] - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. - f.seek(next) - return record - - def __shxHeader(self): - """Reads the header information from a .shx file.""" - shx = self.shx - if not shx: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shx file found") - # File length (16-bit word * 2 = bytes) - header length - shx.seek(24) - shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 - self.numShapes = shxRecordLength // 8 - - def __shxOffsets(self): - '''Reads the shape offset positions from a .shx file''' - shx = self.shx - if not shx: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shx file found") - # Jump to the first record. - shx.seek(100) - # Each index record consists of two nrs, we only want the first one - shxRecords = _Array('i', shx.read(2 * self.numShapes * 4) ) - if sys.byteorder != 'big': - shxRecords.byteswap() - self._offsets = [2 * el for el in shxRecords[::2]] - - def __shapeIndex(self, i=None): - """Returns the offset in a .shp file for a shape based on information - in the .shx index file.""" - shx = self.shx - # Return None if no shx or no index requested - if not shx or i == None: - return None - # At this point, we know the shx file exists - if not self._offsets: - self.__shxOffsets() - return self._offsets[i] - - def shape(self, i=0, bbox=None): - """Returns a shape object for a shape in the geometry - record file. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. - """ - shp = self.__getFileObj(self.shp) - i = self.__restrictIndex(i) - offset = self.__shapeIndex(i) - if not offset: - # Shx index not available. - # Determine length of shp file - shp.seek(0,2) - shpLength = shp.tell() - shp.seek(100) - # Do a fast shape iteration until the requested index or end of file. - unpack = Struct('>2i').unpack - _i = 0 - offset = shp.tell() - while offset < shpLength: - if _i == i: - # Reached the requested index, exit loop with the offset value - break - # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) - # Jump to next shape position - offset += 8 + (2 * recLength) - shp.seek(offset) - _i += 1 - # If the index was not found, it likely means the .shp file is incomplete - if _i != i: - raise ShapefileException('Shape index {} is out of bounds; the .shp file only contains {} shapes'.format(i, _i)) - - # Seek to the offset and read the shape - shp.seek(offset) - return self.__shape(oid=i, bbox=bbox) - - def shapes(self, bbox=None): - """Returns all shapes in a shapefile. - To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - shapes = Shapes() - shapes.extend(self.iterShapes(bbox=bbox)) - return shapes - - def iterShapes(self, bbox=None): - """Returns a generator of shapes in a shapefile. Useful - for handling large shapefiles. - To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - shp = self.__getFileObj(self.shp) - # Found shapefiles which report incorrect - # shp file length in the header. Can't trust - # that so we seek to the end of the file - # and figure it out. - shp.seek(0,2) - shpLength = shp.tell() - shp.seek(100) - - if self.numShapes: - # Iterate exactly the number of shapes from shx header - for i in xrange(self.numShapes): - # MAYBE: check if more left of file or exit early? - shape = self.__shape(oid=i, bbox=bbox) - if shape: - yield shape - else: - # No shx file, unknown nr of shapes - # Instead iterate until reach end of file - # Collect the offset indices during iteration - i = 0 - offsets = [] - pos = shp.tell() - while pos < shpLength: - offsets.append(pos) - shape = self.__shape(oid=i, bbox=bbox) - pos = shp.tell() - if shape: - yield shape - i += 1 - # Entire shp file consumed - # Update the number of shapes and list of offsets - assert i == len(offsets) - self.numShapes = i - self._offsets = offsets - - def __dbfHeader(self): - """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" - if not self.dbf: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no dbf file found)") - dbf = self.dbf - # read relevant header parts - dbf.seek(0) - self.numRecords, self.__dbfHdrLength, self.__recordLength = \ - unpack(" 0: - px, py = list(zip(*s.points))[:2] - x.extend(px) - y.extend(py) - else: - # this should not happen. - # any shape that is not null should have at least one point, and only those should be sent here. - # could also mean that earlier code failed to add points to a non-null shape. - raise Exception("Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." % s.shapeType) - bbox = [min(x), min(y), max(x), max(y)] - # update global - if self._bbox: - # compare with existing - self._bbox = [min(bbox[0],self._bbox[0]), min(bbox[1],self._bbox[1]), max(bbox[2],self._bbox[2]), max(bbox[3],self._bbox[3])] - else: - # first time bbox is being set - self._bbox = bbox - return bbox - - def __zbox(self, s): - z = [] - for p in s.points: - try: - z.append(p[2]) - except IndexError: - # point did not have z value - # setting it to 0 is probably ok, since it means all are on the same elevation - z.append(0) - zbox = [min(z), max(z)] - # update global - if self._zbox: - # compare with existing - self._zbox = [min(zbox[0],self._zbox[0]), max(zbox[1],self._zbox[1])] - else: - # first time zbox is being set - self._zbox = zbox - return zbox - - def __mbox(self, s): - mpos = 3 if s.shapeType in (11,13,15,18,31) else 2 - m = [] - for p in s.points: - try: - if p[mpos] is not None: - # mbox should only be calculated on valid m values - m.append(p[mpos]) - except IndexError: - # point did not have m value so is missing - # mbox should only be calculated on valid m values - pass - if not m: - # only if none of the shapes had m values, should mbox be set to missing m values - m.append(NODATA) - mbox = [min(m), max(m)] - # update global - if self._mbox: - # compare with existing - self._mbox = [min(mbox[0],self._mbox[0]), max(mbox[1],self._mbox[1])] - else: - # first time mbox is being set - self._mbox = mbox - return mbox - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def bbox(self): - """Returns the current bounding box for the shapefile which is - the lower-left and upper-right corners. It does not contain the - elevation or measure extremes.""" - return self._bbox - - def zbox(self): - """Returns the current z extremes for the shapefile.""" - return self._zbox - - def mbox(self): - """Returns the current m extremes for the shapefile.""" - return self._mbox - - def __shapefileHeader(self, fileObj, headerType='shp'): - """Writes the specified header type to the specified file-like object. - Several of the shapefile formats are so similar that a single generic - method to read or write them is warranted.""" - f = self.__getFileObj(fileObj) - f.seek(0) - # File code, Unused bytes - f.write(pack(">6i", 9994,0,0,0,0,0)) - # File length (Bytes / 2 = 16-bit words) - if headerType == 'shp': - f.write(pack(">i", self.__shpFileLength())) - elif headerType == 'shx': - f.write(pack('>i', ((100 + (self.shpNum * 8)) // 2))) - # Version, Shape type - if self.shapeType is None: - self.shapeType = NULL - f.write(pack("<2i", 1000, self.shapeType)) - # The shapefile's bounding box (lower left, upper right) - if self.shapeType != 0: - try: - bbox = self.bbox() - if bbox is None: - # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. - # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. - # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. - # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0,0,0,0] - f.write(pack("<4d", *bbox)) - except error: - raise ShapefileException("Failed to write shapefile bounding box. Floats required.") - else: - f.write(pack("<4d", 0,0,0,0)) - # Elevation - if self.shapeType in (11,13,15,18): - # Z values are present in Z type - zbox = self.zbox() - if zbox is None: - # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = [0,0] - else: - # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = [0,0] - # Measure - if self.shapeType in (11,13,15,18,21,23,25,28,31): - # M values are present in M or Z type - mbox = self.mbox() - if mbox is None: - # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = [0,0] - else: - # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = [0,0] - # Try writing - try: - f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) - except error: - raise ShapefileException("Failed to write shapefile elevation and measure values. Floats required.") - - def __dbfHeader(self): - """Writes the dbf header and field descriptors.""" - f = self.__getFileObj(self.dbf) - f.seek(0) - version = 3 - year, month, day = time.localtime()[:3] - year -= 1900 - # Get all fields, ignoring DeletionFlag if specified - fields = [field for field in self.fields if field[0] != 'DeletionFlag'] - # Ensure has at least one field - if not fields: - raise ShapefileException("Shapefile dbf file must contain at least one field.") - numRecs = self.recNum - numFields = len(fields) - headerLength = numFields * 32 + 33 - if headerLength >= 65535: - raise ShapefileException( - "Shapefile dbf header length exceeds maximum length.") - recordLength = sum([int(field[2]) for field in fields]) + 1 - header = pack('2i", self.shpNum, 0)) - start = f.tell() - # Shape Type - if self.shapeType is None and s.shapeType != NULL: - self.shapeType = s.shapeType - if s.shapeType != NULL and s.shapeType != self.shapeType: - raise Exception("The shape's type (%s) must match the type of the shapefile (%s)." % (s.shapeType, self.shapeType)) - f.write(pack(" 2 else 0)) for p in s.points] - except error: - raise ShapefileException("Failed to write elevation values for record %s. Expected floats." % self.shpNum) - # Write m extremes and values - # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA - # Note: missing m values are autoset to NODATA. - if s.shapeType in (13,15,18,23,25,28,31): - try: - f.write(pack("<2d", *self.__mbox(s))) - except error: - raise ShapefileException("Failed to write measure extremes for record %s. Expected floats" % self.shpNum) - try: - if hasattr(s,"m"): - # if m values are stored in attribute - f.write(pack("<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m])) - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in (13,15,18,31) else 2 - [f.write(pack(" mpos and p[mpos] is not None else NODATA)) for p in s.points] - except error: - raise ShapefileException("Failed to write measure values for record %s. Expected floats" % self.shpNum) - # Write a single point - if s.shapeType in (1,11,21): - try: - f.write(pack("<2d", s.points[0][0], s.points[0][1])) - except error: - raise ShapefileException("Failed to write point for record %s. Expected floats." % self.shpNum) - # Write a single Z value - # Note: missing z values are autoset to 0, but not sure if this is ideal. - if s.shapeType == 11: - # update the global z box - self.__zbox(s) - # then write value - if hasattr(s, "z"): - # if z values are stored in attribute - try: - if not s.z: - s.z = (0,) - f.write(pack("i", length)) - f.seek(finish) - return offset,length - - def __shxRecord(self, offset, length): - """Writes the shx records.""" - f = self.__getFileObj(self.shx) - try: - f.write(pack(">i", offset // 2)) - except error: - raise ShapefileException('The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones.') - f.write(pack(">i", length)) - - def record(self, *recordList, **recordDict): - """Creates a dbf attribute record. You can submit either a sequence of - field values or keyword arguments of field names and values. Before - adding records you must add fields for the record values using the - field() method. If the record values exceed the number of fields the - extra ones won't be added. In the case of using keyword arguments to specify - field/value pairs only fields matching the already registered fields - will be added.""" - # Balance if already not balanced - if self.autoBalance and self.recNum > self.shpNum: - self.balance() - - fieldCount = sum((1 for field in self.fields if field[0] != 'DeletionFlag')) - if recordList: - record = list(recordList) - while len(record) < fieldCount: - record.append("") - elif recordDict: - record = [] - for field in self.fields: - if field[0] == 'DeletionFlag': - continue # ignore deletionflag field in case it was specified - if field[0] in recordDict: - val = recordDict[field[0]] - if val is None: - record.append("") - else: - record.append(val) - else: - record.append("") # need empty value for missing dict entries - else: - # Blank fields for empty record - record = ["" for _ in range(fieldCount)] - self.__dbfRecord(record) - - def __dbfRecord(self, record): - """Writes the dbf records.""" - f = self.__getFileObj(self.dbf) - if self.recNum == 0: - # first records, so all fields should be set - # allowing us to write the dbf header - # cannot change the fields after this point - self.__dbfHeader() - # first byte of the record is deletion flag, always disabled - f.write(b' ') - # begin - self.recNum += 1 - fields = (field for field in self.fields if field[0] != 'DeletionFlag') # ignore deletionflag field in case it was specified - for (fieldName, fieldType, size, deci), value in zip(fields, record): - # write - fieldType = fieldType.upper() - size = int(size) - if fieldType in ("N","F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - value = b"*"*size # QGIS NULL - elif not deci: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - value = format(value, "d")[:size].rjust(size) # caps the size if exceeds the field size - else: - value = float(value) - value = format(value, ".%sf"%deci)[:size].rjust(size) # caps the size if exceeds the field size - elif fieldType == "D": - # date: 8 bytes - date stored as a string in the format YYYYMMDD. - if isinstance(value, date): - value = '{:04d}{:02d}{:02d}'.format(value.year, value.month, value.day) - elif isinstance(value, list) and len(value) == 3: - value = '{:04d}{:02d}{:02d}'.format(*value) - elif value in MISSING: - value = b'0' * 8 # QGIS NULL for date type - elif is_string(value) and len(value) == 8: - pass # value is already a date string - else: - raise ShapefileException("Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value.") - elif fieldType == 'L': - # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. - if value in MISSING: - value = b' ' # missing is set to space - elif value in [True,1]: - value = b'T' - elif value in [False,0]: - value = b'F' - else: - value = b' ' # unknown is set to space - else: - # anything else is forced to string, truncated to the length of the field - value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - if not isinstance(value, bytes): - # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - value = b(value, 'ascii', self.encodingErrors) # should be default ascii encoding - if len(value) != size: - raise ShapefileException( - "Shapefile Writer unable to pack incorrect sized value" - " (size %d) into field '%s' (size %d)." % (len(value), fieldName, size)) - f.write(value) - - def balance(self): - """Adds corresponding empty attributes or null geometry records depending - on which type of record was created to make sure all three files - are in synch.""" - while self.recNum > self.shpNum: - self.null() - while self.recNum < self.shpNum: - self.record() - - - def null(self): - """Creates a null shape.""" - self.shape(Shape(NULL)) - - - def point(self, x, y): - """Creates a POINT shape.""" - shapeType = POINT - pointShape = Shape(shapeType) - pointShape.points.append([x, y]) - self.shape(pointShape) - - def pointm(self, x, y, m=None): - """Creates a POINTM shape. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTM - pointShape = Shape(shapeType) - pointShape.points.append([x, y, m]) - self.shape(pointShape) - - def pointz(self, x, y, z=0, m=None): - """Creates a POINTZ shape. - If the z (elevation) value is not set, it defaults to 0. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTZ - pointShape = Shape(shapeType) - pointShape.points.append([x, y, z, m]) - self.shape(pointShape) - - - def multipoint(self, points): - """Creates a MULTIPOINT shape. - Points is a list of xy values.""" - shapeType = MULTIPOINT - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointm(self, points): - """Creates a MULTIPOINTM shape. - Points is a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTM - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointz(self, points): - """Creates a MULTIPOINTZ shape. - Points is a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTZ - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - - def line(self, lines): - """Creates a POLYLINE shape. - Lines is a collection of lines, each made up of a list of xy values.""" - shapeType = POLYLINE - self._shapeparts(parts=lines, shapeType=shapeType) - - def linem(self, lines): - """Creates a POLYLINEM shape. - Lines is a collection of lines, each made up of a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEM - self._shapeparts(parts=lines, shapeType=shapeType) - - def linez(self, lines): - """Creates a POLYLINEZ shape. - Lines is a collection of lines, each made up of a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEZ - self._shapeparts(parts=lines, shapeType=shapeType) - - - def poly(self, polys): - """Creates a POLYGON shape. - Polys is a collection of polygons, each made up of a list of xy values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction.""" - shapeType = POLYGON - self._shapeparts(parts=polys, shapeType=shapeType) - - def polym(self, polys): - """Creates a POLYGONM shape. - Polys is a collection of polygons, each made up of a list of xym values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONM - self._shapeparts(parts=polys, shapeType=shapeType) - - def polyz(self, polys): - """Creates a POLYGONZ shape. - Polys is a collection of polygons, each made up of a list of xyzm values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONZ - self._shapeparts(parts=polys, shapeType=shapeType) - - - def multipatch(self, parts, partTypes): - """Creates a MULTIPATCH shape. - Parts is a collection of 3D surface patches, each made up of a list of xyzm values. - PartTypes is a list of types that define each of the surface patches. - The types can be any of the following module constants: TRIANGLE_STRIP, - TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPATCH - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - polyShape.partTypes = partTypes - # write the shape - self.shape(polyShape) - - - def _shapeparts(self, parts, shapeType): - """Internal method for adding a shape that has multiple collections of points (parts): - lines, polygons, and multipoint shapes. - """ - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - # Make sure polygon rings (parts) are closed - if shapeType in (5,15,25,31): - for part in parts: - if part[0] != part[-1]: - part.append(part[0]) - # Add points and part indexes - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - # write the shape - self.shape(polyShape) - - def field(self, name, fieldType="C", size="50", decimal=0): - """Adds a dbf field descriptor to the shapefile.""" - if fieldType == "D": - size = "8" - decimal = 0 - elif fieldType == "L": - size = "1" - decimal = 0 - if len(self.fields) >= 2046: - raise ShapefileException( - "Shapefile Writer reached maximum number of fields: 2046.") - self.fields.append((name, fieldType, size, decimal)) - -## def saveShp(self, target): -## """Save an shp file.""" -## if not hasattr(target, "write"): -## target = os.path.splitext(target)[0] + '.shp' -## self.shp = self.__getFileObj(target) -## self.__shapefileHeader(self.shp, headerType='shp') -## self.shp.seek(100) -## self._shp.seek(0) -## chunk = True -## while chunk: -## chunk = self._shp.read(self.bufsize) -## self.shp.write(chunk) -## -## def saveShx(self, target): -## """Save an shx file.""" -## if not hasattr(target, "write"): -## target = os.path.splitext(target)[0] + '.shx' -## self.shx = self.__getFileObj(target) -## self.__shapefileHeader(self.shx, headerType='shx') -## self.shx.seek(100) -## self._shx.seek(0) -## chunk = True -## while chunk: -## chunk = self._shx.read(self.bufsize) -## self.shx.write(chunk) -## -## def saveDbf(self, target): -## """Save a dbf file.""" -## if not hasattr(target, "write"): -## target = os.path.splitext(target)[0] + '.dbf' -## self.dbf = self.__getFileObj(target) -## self.__dbfHeader() # writes to .dbf -## self._dbf.seek(0) -## chunk = True -## while chunk: -## chunk = self._dbf.read(self.bufsize) -## self.dbf.write(chunk) - -## def save(self, target=None, shp=None, shx=None, dbf=None): -## """Save the shapefile data to three files or -## three file-like objects. SHP and DBF files can also -## be written exclusively using saveShp, saveShx, and saveDbf respectively. -## If target is specified but not shp, shx, or dbf then the target path and -## file name are used. If no options or specified, a unique base file name -## is generated to save the files and the base file name is returned as a -## string. -## """ -## # Balance if already not balanced -## if shp and dbf: -## if self.autoBalance: -## self.balance() -## if self.recNum != self.shpNum: -## raise ShapefileException("When saving both the dbf and shp file, " -## "the number of records (%s) must correspond " -## "with the number of shapes (%s)" % (self.recNum, self.shpNum)) -## # Save -## if shp: -## self.saveShp(shp) -## if shx: -## self.saveShx(shx) -## if dbf: -## self.saveDbf(dbf) -## # Create a unique file name if one is not defined -## if not shp and not shx and not dbf: -## generated = False -## if not target: -## temp = tempfile.NamedTemporaryFile(prefix="shapefile_",dir=os.getcwd()) -## target = temp.name -## generated = True -## self.saveShp(target) -## self.shp.close() -## self.saveShx(target) -## self.shx.close() -## self.saveDbf(target) -## self.dbf.close() -## if generated: -## return target - -# Begin Testing -def test(**kwargs): - import doctest - doctest.NORMALIZE_WHITESPACE = 1 - verbosity = kwargs.get('verbose', 0) - if verbosity == 0: - print('Running doctests...') - - # ignore py2-3 unicode differences - import re - class Py23DocChecker(doctest.OutputChecker): - def check_output(self, want, got, optionflags): - if sys.version_info[0] == 2: - got = re.sub("u'(.*?)'", "'\\1'", got) - got = re.sub('u"(.*?)"', '"\\1"', got) - res = doctest.OutputChecker.check_output(self, want, got, optionflags) - return res - def summarize(self): - doctest.OutputChecker.summarize(True) - - # run tests - runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) - with open("README.md","rb") as fobj: - test = doctest.DocTestParser().get_doctest(string=fobj.read().decode("utf8").replace('\r\n','\n'), globs={}, name="README", filename="README.md", lineno=0) - failure_count, test_count = runner.run(test) - - # print results - if verbosity: - runner.summarize(True) - else: - if failure_count == 0: - print('All test passed successfully') - elif failure_count > 0: - runner.summarize(verbosity) - - return failure_count - -if __name__ == "__main__": - """ - Doctests are contained in the file 'README.md', and are tested using the built-in - testing libraries. - """ - failure_count = test() - sys.exit(failure_count) +""" +shapefile.py +Provides read and write support for ESRI Shapefiles. +authors: jlawheadgeospatialpython.com +maintainer: karim.bahgat.norwaygmail.com +Compatible with Python versions 2.7-3.x +""" + +__version__ = "2.4.0" + +import array +import io +import logging +import os +import sys +import tempfile +import time +import zipfile +from datetime import date +from struct import Struct, calcsize, error, pack, unpack + +# Create named logger +logger = logging.getLogger(__name__) + +# Module settings +VERBOSE = True + +# Test config (for the Doctest runner and test_shapefile.py) +REPLACE_REMOTE_URLS_WITH_LOCALHOST = ( + os.getenv("REPLACE_REMOTE_URLS_WITH_LOCALHOST", "").lower() == "yes" +) + +# Constants for shape types +NULL = 0 +POINT = 1 +POLYLINE = 3 +POLYGON = 5 +MULTIPOINT = 8 +POINTZ = 11 +POLYLINEZ = 13 +POLYGONZ = 15 +MULTIPOINTZ = 18 +POINTM = 21 +POLYLINEM = 23 +POLYGONM = 25 +MULTIPOINTM = 28 +MULTIPATCH = 31 + +SHAPETYPE_LOOKUP = { + 0: "NULL", + 1: "POINT", + 3: "POLYLINE", + 5: "POLYGON", + 8: "MULTIPOINT", + 11: "POINTZ", + 13: "POLYLINEZ", + 15: "POLYGONZ", + 18: "MULTIPOINTZ", + 21: "POINTM", + 23: "POLYLINEM", + 25: "POLYGONM", + 28: "MULTIPOINTM", + 31: "MULTIPATCH", +} + +TRIANGLE_STRIP = 0 +TRIANGLE_FAN = 1 +OUTER_RING = 2 +INNER_RING = 3 +FIRST_RING = 4 +RING = 5 + +PARTTYPE_LOOKUP = { + 0: "TRIANGLE_STRIP", + 1: "TRIANGLE_FAN", + 2: "OUTER_RING", + 3: "INNER_RING", + 4: "FIRST_RING", + 5: "RING", +} + + +# Python 2-3 handling + +PYTHON3 = sys.version_info[0] == 3 + +if PYTHON3: + xrange = range + izip = zip + + from urllib.error import HTTPError + from urllib.parse import urlparse, urlunparse + from urllib.request import Request, urlopen + +else: + from itertools import izip + + from urllib2 import HTTPError, Request, urlopen + from urlparse import urlparse, urlunparse + + +# Helpers + +MISSING = [None, ""] +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. + +if PYTHON3: + + def b(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, str): + # For python 3 encode str to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return b"" + else: + # Force string representation. + return str(v).encode(encoding, encodingErrors) + + def u(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, bytes): + # For python 3 decode bytes to str. + return v.decode(encoding, encodingErrors) + elif isinstance(v, str): + # Already str. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) + + def is_string(v): + return isinstance(v, str) + +else: + + def b(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, unicode): + # For python 2 encode unicode to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return unicode(v).encode(encoding, encodingErrors) + + def u(v, encoding="utf-8", encodingErrors="strict"): + if isinstance(v, bytes): + # For python 2 decode bytes to unicode. + return v.decode(encoding, encodingErrors) + elif isinstance(v, unicode): + # Already unicode. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) + + def is_string(v): + return isinstance(v, basestring) + + +if sys.version_info[0:2] >= (3, 6): + + def pathlike_obj(path): + if isinstance(path, os.PathLike): + return os.fsdecode(path) + else: + return path +else: + + def pathlike_obj(path): + if is_string(path): + return path + elif hasattr(path, "__fspath__"): + return path.__fspath__() + else: + try: + return str(path) + except: + return path + + +# Begin + + +class _Array(array.array): + """Converts python tuples to lists of the appropriate type. + Used to unpack different shapefile header parts.""" + + def __repr__(self): + return str(self.tolist()) + + +def signed_area(coords, fast=False): + """Return the signed area enclosed by a ring using the linear time + algorithm. A value >= 0 indicates a counter-clockwise oriented ring. + A faster version is possible by setting 'fast' to True, which returns + 2x the area, e.g. if you're only interested in the sign of the area. + """ + xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values + xs.append(xs[1]) + ys.append(ys[1]) + area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) + if fast: + return area2 + else: + return area2 / 2.0 + + +def is_cw(coords): + """Returns True if a polygon ring has clockwise orientation, determined + by a negatively signed area. + """ + area2 = signed_area(coords, fast=True) + return area2 < 0 + + +def rewind(coords): + """Returns the input coords in reversed order.""" + return list(reversed(coords)) + + +def ring_bbox(coords): + """Calculates and returns the bounding box of a ring.""" + xs, ys = zip(*coords) + bbox = min(xs), min(ys), max(xs), max(ys) + return bbox + + +def bbox_overlap(bbox1, bbox2): + """Tests whether two bounding boxes overlap, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 + return overlap + + +def bbox_contains(bbox1, bbox2): + """Tests whether bbox1 fully contains bbox2, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 + return contains + + +def ring_contains_point(coords, p): + """Fast point-in-polygon crossings algorithm, MacMartin optimization. + + Adapted from code by Eric Haynes + http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c + + Original description: + Shoot a test ray along +X axis. The strategy, from MacMartin, is to + compare vertex Y values to the testing point's Y and quickly discard + edges which are entirely to one side of the test ray. + """ + tx, ty = p + + # get initial test bit for above/below X axis + vtx0 = coords[0] + yflag0 = vtx0[1] >= ty + + inside_flag = False + for vtx1 in coords[1:]: + yflag1 = vtx1[1] >= ty + # check if endpoints straddle (are on opposite sides) of X axis + # (i.e. the Y's differ); if so, +X ray could intersect this edge. + if yflag0 != yflag1: + xflag0 = vtx0[0] >= tx + # check if endpoints are on same side of the Y axis (i.e. X's + # are the same); if so, it's easy to test if edge hits or misses. + if xflag0 == (vtx1[0] >= tx): + # if edge's X values both right of the point, must hit + if xflag0: + inside_flag = not inside_flag + else: + # compute intersection of pgon segment with +X ray, note + # if >= point's X; if so, the ray hits it. + if ( + vtx1[0] - (vtx1[1] - ty) * (vtx0[0] - vtx1[0]) / (vtx0[1] - vtx1[1]) + ) >= tx: + inside_flag = not inside_flag + + # move to next pair of vertices, retaining info as possible + yflag0 = yflag1 + vtx0 = vtx1 + + return inside_flag + + +def ring_sample(coords, ccw=False): + """Return a sample point guaranteed to be within a ring, by efficiently + finding the first centroid of a coordinate triplet whose orientation + matches the orientation of the ring and passes the point-in-ring test. + The orientation of the ring is assumed to be clockwise, unless ccw + (counter-clockwise) is set to True. + """ + triplet = [] + + def itercoords(): + # iterate full closed ring + for p in coords: + yield p + # finally, yield the second coordinate to the end to allow checking the last triplet + yield coords[1] + + for p in itercoords(): + # add point to triplet (but not if duplicate) + if p not in triplet: + triplet.append(p) + + # new triplet, try to get sample + if len(triplet) == 3: + # check that triplet does not form a straight line (not a triangle) + is_straight_line = (triplet[0][1] - triplet[1][1]) * ( + triplet[0][0] - triplet[2][0] + ) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) + if not is_straight_line: + # get triplet orientation + closed_triplet = triplet + [triplet[0]] + triplet_ccw = not is_cw(closed_triplet) + # check that triplet has the same orientation as the ring (means triangle is inside the ring) + if ccw == triplet_ccw: + # get triplet centroid + xs, ys = zip(*triplet) + xmean, ymean = sum(xs) / 3.0, sum(ys) / 3.0 + # check that triplet centroid is truly inside the ring + if ring_contains_point(coords, (xmean, ymean)): + return xmean, ymean + + # failed to get sample point from this triplet + # remove oldest triplet coord to allow iterating to next triplet + triplet.pop(0) + + else: + raise Exception("Unexpected error: Unable to find a ring sample point.") + + +def ring_contains_ring(coords1, coords2): + """Returns True if all vertexes in coords2 are fully inside coords1.""" + return all((ring_contains_point(coords1, p2) for p2 in coords2)) + + +def organize_polygon_rings(rings, return_errors=None): + """Organize a list of coordinate rings into one or more polygons with holes. + Returns a list of polygons, where each polygon is composed of a single exterior + ring, and one or more interior holes. If a return_errors dict is provided (optional), + any errors encountered will be added to it. + + Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). + Rings are determined as exteriors if they run in clockwise direction, or interior + holes if they run in counter-clockwise direction. This method is used to construct + GeoJSON (multi)polygons from the shapefile polygon shape type, which does not + explicitly store the structure of the polygons beyond exterior/interior ring orientation. + """ + # first iterate rings and classify as exterior or hole + exteriors = [] + holes = [] + for ring in rings: + # shapefile format defines a polygon as a sequence of rings + # where exterior rings are clockwise, and holes counterclockwise + if is_cw(ring): + # ring is exterior + exteriors.append(ring) + else: + # ring is a hole + holes.append(ring) + + # if only one exterior, then all holes belong to that exterior + if len(exteriors) == 1: + # exit early + poly = [exteriors[0]] + holes + polys = [poly] + return polys + + # multiple exteriors, ie multi-polygon, have to group holes with correct exterior + # shapefile format does not specify which holes belong to which exteriors + # so have to do efficient multi-stage checking of hole-to-exterior containment + elif len(exteriors) > 1: + # exit early if no holes + if not holes: + polys = [] + for ext in exteriors: + poly = [ext] + polys.append(poly) + return polys + + # first determine each hole's candidate exteriors based on simple bbox contains test + hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) + exterior_bboxes = [ring_bbox(ring) for ring in exteriors] + for hole_i in hole_exteriors.keys(): + hole_bbox = ring_bbox(holes[hole_i]) + for ext_i, ext_bbox in enumerate(exterior_bboxes): + if bbox_contains(ext_bbox, hole_bbox): + hole_exteriors[hole_i].append(ext_i) + + # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test + for hole_i, exterior_candidates in hole_exteriors.items(): + if len(exterior_candidates) > 1: + # get hole sample point + ccw = not is_cw(holes[hole_i]) + hole_sample = ring_sample(holes[hole_i], ccw=ccw) + # collect new exterior candidates + new_exterior_candidates = [] + for ext_i in exterior_candidates: + # check that hole sample point is inside exterior + hole_in_exterior = ring_contains_point( + exteriors[ext_i], hole_sample + ) + if hole_in_exterior: + new_exterior_candidates.append(ext_i) + + # set new exterior candidates + hole_exteriors[hole_i] = new_exterior_candidates + + # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole + for hole_i, exterior_candidates in hole_exteriors.items(): + if len(exterior_candidates) > 1: + # exterior candidate with the smallest area is the hole's most immediate parent + ext_i = sorted( + exterior_candidates, + key=lambda x: abs(signed_area(exteriors[x], fast=True)), + )[0] + hole_exteriors[hole_i] = [ext_i] + + # separate out holes that are orphaned (not contained by any exterior) + orphan_holes = [] + for hole_i, exterior_candidates in list(hole_exteriors.items()): + if not exterior_candidates: + orphan_holes.append(hole_i) + del hole_exteriors[hole_i] + continue + + # each hole should now only belong to one exterior, group into exterior-holes polygons + polys = [] + for ext_i, ext in enumerate(exteriors): + poly = [ext] + # find relevant holes + poly_holes = [] + for hole_i, exterior_candidates in list(hole_exteriors.items()): + # hole is relevant if previously matched with this exterior + if exterior_candidates[0] == ext_i: + poly_holes.append(holes[hole_i]) + poly += poly_holes + polys.append(poly) + + # add orphan holes as exteriors + for hole_i in orphan_holes: + ext = holes[hole_i] + # add as single exterior without any holes + poly = [ext] + polys.append(poly) + + if orphan_holes and return_errors is not None: + return_errors["polygon_orphaned_holes"] = len(orphan_holes) + + return polys + + # no exteriors, be nice and assume due to incorrect winding order + else: + if return_errors is not None: + return_errors["polygon_only_holes"] = len(holes) + exteriors = holes + # add as single exterior without any holes + polys = [[ext] for ext in exteriors] + return polys + + +class Shape(object): + def __init__( + self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None + ): + """Stores the geometry of the different shape types + specified in the Shapefile spec. Shape types are + usually point, polyline, or polygons. Every shape type + except the "Null" type contains points at some level for + example vertices in a polygon. If a shape type has + multiple shapes containing points within a single + geometry record then those shapes are called parts. Parts + are designated by their starting index in geometry record's + list of shapes. For MultiPatch geometry, partTypes designates + the patch type of each of the parts. + """ + self.shapeType = shapeType + self.points = points or [] + self.parts = parts or [] + if partTypes: + self.partTypes = partTypes + + # and a dict to silently record any errors encountered + self._errors = {} + + # add oid + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 + + @property + def __geo_interface__(self): + if self.shapeType in [POINT, POINTM, POINTZ]: + # point + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Point", "coordinates": tuple()} + else: + return {"type": "Point", "coordinates": tuple(self.points[0])} + elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "MultiPoint", "coordinates": []} + else: + # multipoint + return { + "type": "MultiPoint", + "coordinates": [tuple(p) for p in self.points], + } + elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "LineString", "coordinates": []} + elif len(self.parts) == 1: + # linestring + return { + "type": "LineString", + "coordinates": [tuple(p) for p in self.points], + } + else: + # multilinestring + ps = None + coordinates = [] + for part in self.parts: + if ps is None: + ps = part + continue + else: + coordinates.append([tuple(p) for p in self.points[ps:part]]) + ps = part + else: + coordinates.append([tuple(p) for p in self.points[part:]]) + return {"type": "MultiLineString", "coordinates": coordinates} + elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {"type": "Polygon", "coordinates": []} + else: + # get all polygon rings + rings = [] + for i in xrange(len(self.parts)): + # get indexes of start and end points of the ring + start = self.parts[i] + try: + end = self.parts[i + 1] + except IndexError: + end = len(self.points) + + # extract the points that make up the ring + ring = [tuple(p) for p in self.points[start:end]] + rings.append(ring) + + # organize rings into list of polygons, where each polygon is defined as list of rings. + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + polys = organize_polygon_rings(rings, self._errors) + + # if VERBOSE is True, issue detailed warning about any shape errors + # encountered during the Shapefile to GeoJSON conversion + if VERBOSE and self._errors: + header = "Possible issue encountered when converting Shape #{} to GeoJSON: ".format( + self.oid + ) + orphans = self._errors.get("polygon_orphaned_holes", None) + if orphans: + msg = ( + header + + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ +but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ +orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ +encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + only_holes = self._errors.get("polygon_only_holes", None) + if only_holes: + msg = ( + header + + "Shapefile format requires that polygons contain at least one exterior ring, \ +but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ +still included but were encoded as GeoJSON exterior rings instead of holes." + ) + logger.warning(msg) + + # return as geojson + if len(polys) == 1: + return {"type": "Polygon", "coordinates": polys[0]} + else: + return {"type": "MultiPolygon", "coordinates": polys} + + else: + raise Exception( + 'Shape type "%s" cannot be represented as GeoJSON.' + % SHAPETYPE_LOOKUP[self.shapeType] + ) + + @staticmethod + def _from_geojson(geoj): + # create empty shape + shape = Shape() + # set shapeType + geojType = geoj["type"] if geoj else "Null" + if geojType == "Null": + shapeType = NULL + elif geojType == "Point": + shapeType = POINT + elif geojType == "LineString": + shapeType = POLYLINE + elif geojType == "Polygon": + shapeType = POLYGON + elif geojType == "MultiPoint": + shapeType = MULTIPOINT + elif geojType == "MultiLineString": + shapeType = POLYLINE + elif geojType == "MultiPolygon": + shapeType = POLYGON + else: + raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) + shape.shapeType = shapeType + + # set points and parts + if geojType == "Point": + shape.points = [geoj["coordinates"]] + shape.parts = [0] + elif geojType in ("MultiPoint", "LineString"): + shape.points = geoj["coordinates"] + shape.parts = [0] + elif geojType in ("Polygon"): + points = [] + parts = [] + index = 0 + for i, ext_or_hole in enumerate(geoj["coordinates"]): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + shape.points = points + shape.parts = parts + elif geojType in ("MultiLineString"): + points = [] + parts = [] + index = 0 + for linestring in geoj["coordinates"]: + points.extend(linestring) + parts.append(index) + index += len(linestring) + shape.points = points + shape.parts = parts + elif geojType in ("MultiPolygon"): + points = [] + parts = [] + index = 0 + for polygon in geoj["coordinates"]: + for i, ext_or_hole in enumerate(polygon): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + shape.points = points + shape.parts = parts + return shape + + @property + def oid(self): + """The index position of the shape in the original shapefile""" + return self.__oid + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def __repr__(self): + return "Shape #{}: {}".format(self.__oid, self.shapeTypeName) + + +class _Record(list): + """ + A class to hold a record. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, the values of the record + can also be retrieved using the field's name. For example if the dbf contains + a field ID at position 0, the ID can be retrieved with the position, the field name + as a key, or the field name as an attribute. + + >>> # Create a Record with one field, normally the record is created by the Reader class + >>> r = _Record({'ID': 0}, [0]) + >>> print(r[0]) + >>> print(r['ID']) + >>> print(r.ID) + """ + + def __init__(self, field_positions, values, oid=None): + """ + A Record should be created by the Reader class + + :param field_positions: A dict mapping field names to field positions + :param values: A sequence of values + :param oid: The object id, an int (optional) + """ + self.__field_positions = field_positions + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 + list.__init__(self, values) + + def __getattr__(self, item): + """ + __getattr__ is called if an attribute is used that does + not exist in the normal sense. For example r=Record(...), r.ID + calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) + :param item: The field name, used as attribute + :return: Value of the field + :raises: AttributeError, if item is not a field of the shapefile + and IndexError, if the field exists but the field's + corresponding value in the Record does not exist + """ + try: + if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() + raise AttributeError("_Record does not implement __setstate__") + index = self.__field_positions[item] + return list.__getitem__(self, index) + except KeyError: + raise AttributeError("{} is not a field name".format(item)) + except IndexError: + raise IndexError( + "{} found as a field but not enough values available.".format(item) + ) + + def __setattr__(self, key, value): + """ + Sets a value of a field attribute + :param key: The field name + :param value: the value of that field + :return: None + :raises: AttributeError, if key is not a field of the shapefile + """ + if key.startswith("_"): # Prevent infinite loop when setting mangled attribute + return list.__setattr__(self, key, value) + try: + index = self.__field_positions[key] + return list.__setitem__(self, index, value) + except KeyError: + raise AttributeError("{} is not a field name".format(key)) + + def __getitem__(self, item): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID'], r[0] + :param item: Either the position of the value or the name of a field + :return: the value of the field + """ + try: + return list.__getitem__(self, item) + except TypeError: + try: + index = self.__field_positions[item] + except KeyError: + index = None + if index is not None: + return list.__getitem__(self, index) + else: + raise IndexError('"{}" is not a field name and not an int'.format(item)) + + def __setitem__(self, key, value): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID']=2, r[0]=2 + :param key: Either the position of the value or the name of a field + :param value: the new value of the field + """ + try: + return list.__setitem__(self, key, value) + except TypeError: + index = self.__field_positions.get(key) + if index is not None: + return list.__setitem__(self, index, value) + else: + raise IndexError("{} is not a field name and not an int".format(key)) + + @property + def oid(self): + """The index position of the record in the original shapefile""" + return self.__oid + + def as_dict(self, date_strings=False): + """ + Returns this Record as a dictionary using the field names as keys + :return: dict + """ + dct = dict((f, self[i]) for f, i in self.__field_positions.items()) + if date_strings: + for k, v in dct.items(): + if isinstance(v, date): + dct[k] = "{:04d}{:02d}{:02d}".format(v.year, v.month, v.day) + return dct + + def __repr__(self): + return "Record #{}: {}".format(self.__oid, list(self)) + + def __dir__(self): + """ + Helps to show the field names in an interactive environment like IPython. + See: http://ipython.readthedocs.io/en/stable/config/integrating.html + + :return: List of method names and fields + """ + default = list( + dir(type(self)) + ) # default list methods and attributes of this class + fnames = list( + self.__field_positions.keys() + ) # plus field names (random order if Python version < 3.6) + return default + fnames + + def __eq__(self, other): + if isinstance(other, self.__class__): + if self.__field_positions != other.__field_positions: + return False + return list.__eq__(self, other) + + +class ShapeRecord(object): + """A ShapeRecord object containing a shape along with its attributes. + Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" + + def __init__(self, shape=None, record=None): + self.shape = shape + self.record = record + + @property + def __geo_interface__(self): + return { + "type": "Feature", + "properties": self.record.as_dict(date_strings=True), + "geometry": None + if self.shape.shapeType == NULL + else self.shape.__geo_interface__, + } + + +class Shapes(list): + """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a GeometryCollection dictionary.""" + + def __repr__(self): + return "Shapes: {}".format(list(self)) + + @property + def __geo_interface__(self): + # Note: currently this will fail if any of the shapes are null-geometries + # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords + collection = { + "type": "GeometryCollection", + "geometries": [shape.__geo_interface__ for shape in self], + } + return collection + + +class ShapeRecords(list): + """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a FeatureCollection dictionary.""" + + def __repr__(self): + return "ShapeRecords: {}".format(list(self)) + + @property + def __geo_interface__(self): + collection = { + "type": "FeatureCollection", + "features": [shaperec.__geo_interface__ for shaperec in self], + } + return collection + + +class ShapefileException(Exception): + """An exception to handle shapefile specific problems.""" + + pass + + +class Reader(object): + """Reads the three files of a shapefile as a unit or + separately. If one of the three files (.shp, .shx, + .dbf) is missing no exception is thrown until you try + to call a method that depends on that particular file. + The .shx index file is used if available for efficiency + but is not required to read the geometry from the .shp + file. The "shapefile" argument in the constructor is the + name of the file you want to open, and can be the path + to a shapefile on a local filesystem, inside a zipfile, + or a url. + + You can instantiate a Reader without specifying a shapefile + and then specify one later with the load() method. + + Only the shapefile headers are read upon loading. Content + within each file is only accessed when required and as + efficiently as possible. Shapefiles are usually not large + but they can be. + """ + + def __init__(self, *args, **kwargs): + self.shp = None + self.shx = None + self.dbf = None + self._files_to_close = [] + self.shapeName = "Not specified" + self._offsets = [] + self.shpLength = None + self.numRecords = None + self.numShapes = None + self.fields = [] + self.__dbfHdrLength = 0 + self.__fieldLookup = {} + self.encoding = kwargs.pop("encoding", "utf-8") + self.encodingErrors = kwargs.pop("encodingErrors", "strict") + # See if a shapefile name was passed as the first argument + if len(args) > 0: + path = pathlike_obj(args[0]) + if is_string(path): + if ".zip" in path: + # Shapefile is inside a zipfile + if path.count(".zip") > 1: + # Multiple nested zipfiles + raise ShapefileException( + "Reading from multiple nested zipfiles is not supported: %s" + % path + ) + # Split into zipfile and shapefile paths + if path.endswith(".zip"): + zpath = path + shapefile = None + else: + zpath = path[: path.find(".zip") + 4] + shapefile = path[path.find(".zip") + 4 + 1 :] + # Create a zip file handle + if zpath.startswith("http"): + # Zipfile is from a url + # Download to a temporary url and treat as normal zipfile + req = Request( + zpath, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) + resp = urlopen(req) + # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected + zipfileobj = tempfile.NamedTemporaryFile( + mode="w+b", suffix=".zip", delete=True + ) + zipfileobj.write(resp.read()) + zipfileobj.seek(0) + else: + # Zipfile is from a file + zipfileobj = open(zpath, mode="rb") + # Open the zipfile archive + with zipfile.ZipFile(zipfileobj, "r") as archive: + if not shapefile: + # Only the zipfile path is given + # Inspect zipfile contents to find the full shapefile path + shapefiles = [ + name + for name in archive.namelist() + if (name.endswith(".SHP") or name.endswith(".shp")) + ] + # The zipfile must contain exactly one shapefile + if len(shapefiles) == 0: + raise ShapefileException( + "Zipfile does not contain any shapefiles" + ) + elif len(shapefiles) == 1: + shapefile = shapefiles[0] + else: + raise ShapefileException( + "Zipfile contains more than one shapefile: %s. Please specify the full \ + path to the shapefile you would like to open." + % shapefiles + ) + # Try to extract file-like objects from zipfile + shapefile = os.path.splitext(shapefile)[ + 0 + ] # root shapefile name + for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: + try: + member = archive.open(shapefile + "." + ext) + # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(member.read()) + fileobj.seek(0) + setattr(self, ext.lower(), fileobj) + self._files_to_close.append(fileobj) + except: + pass + # Close and delete the temporary zipfile + try: + zipfileobj.close() + except: + pass + # Try to load shapefile + if self.shp or self.dbf: + # Load and exit early + self.load() + return + else: + raise ShapefileException( + "No shp or dbf file found in zipfile: %s" % path + ) + + elif path.startswith("http"): + # Shapefile is from a url + # Download each file to temporary path and treat as normal shapefile path + urlinfo = urlparse(path) + urlpath = urlinfo[2] + urlpath, _ = os.path.splitext(urlpath) + shapefile = os.path.basename(urlpath) + for ext in ["shp", "shx", "dbf"]: + try: + _urlinfo = list(urlinfo) + _urlinfo[2] = urlpath + "." + ext + _path = urlunparse(_urlinfo) + req = Request( + _path, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) + resp = urlopen(req) + # write url data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) + fileobj.write(resp.read()) + fileobj.seek(0) + setattr(self, ext, fileobj) + self._files_to_close.append(fileobj) + except HTTPError: + pass + if self.shp or self.dbf: + # Load and exit early + self.load() + return + else: + raise ShapefileException( + "No shp or dbf file found at url: %s" % path + ) + + else: + # Local file path to a shapefile + # Load and exit early + self.load(path) + return + + # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) + if "shp" in kwargs: + if hasattr(kwargs["shp"], "read"): + self.shp = kwargs["shp"] + # Copy if required + try: + self.shp.seek(0) + except (NameError, io.UnsupportedOperation): + self.shp = io.BytesIO(self.shp.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["shp"]) + self.load_shp(baseName) + + if "shx" in kwargs: + if hasattr(kwargs["shx"], "read"): + self.shx = kwargs["shx"] + # Copy if required + try: + self.shx.seek(0) + except (NameError, io.UnsupportedOperation): + self.shx = io.BytesIO(self.shx.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["shx"]) + self.load_shx(baseName) + + if "dbf" in kwargs: + if hasattr(kwargs["dbf"], "read"): + self.dbf = kwargs["dbf"] + # Copy if required + try: + self.dbf.seek(0) + except (NameError, io.UnsupportedOperation): + self.dbf = io.BytesIO(self.dbf.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["dbf"]) + self.load_dbf(baseName) + + # Load the files + if self.shp or self.dbf: + self.load() + + def __str__(self): + """ + Use some general info on the shapefile as __str__ + """ + info = ["shapefile Reader"] + if self.shp: + info.append( + " {} shapes (type '{}')".format( + len(self), SHAPETYPE_LOOKUP[self.shapeType] + ) + ) + if self.dbf: + info.append( + " {} records ({} fields)".format(len(self), len(self.fields)) + ) + return "\n".join(info) + + def __enter__(self): + """ + Enter phase of context manager. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit phase of context manager, close opened files. + """ + self.close() + + def __len__(self): + """Returns the number of shapes/records in the shapefile.""" + if self.dbf: + # Preferably use dbf record count + if self.numRecords is None: + self.__dbfHeader() + + return self.numRecords + + elif self.shp: + # Otherwise use shape count + if self.shx: + if self.numShapes is None: + self.__shxHeader() + + return self.numShapes + + else: + # Index file not available, iterate all shapes to get total count + if self.numShapes is None: + # Determine length of shp file + shp = self.shp + checkpoint = shp.tell() + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until end of file. + unpack = Struct(">2i").unpack + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + # Unpack the shape header only + (recNum, recLength) = unpack(shp.read(8)) + # Jump to next shape position + pos += 8 + (2 * recLength) + shp.seek(pos) + # Set numShapes and offset indices + self.numShapes = len(offsets) + self._offsets = offsets + # Return to previous file position + shp.seek(checkpoint) + + return self.numShapes + + else: + # No file loaded yet, treat as 'empty' shapefile + return 0 + + def __iter__(self): + """Iterates through the shapes/records in the shapefile.""" + for shaperec in self.iterShapeRecords(): + yield shaperec + + @property + def __geo_interface__(self): + shaperecords = self.shapeRecords() + fcollection = shaperecords.__geo_interface__ + fcollection["bbox"] = list(self.bbox) + return fcollection + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def load(self, shapefile=None): + """Opens a shapefile from a filename or file-like + object. Normally this method would be called by the + constructor with the file name as an argument.""" + if shapefile: + (shapeName, ext) = os.path.splitext(shapefile) + self.shapeName = shapeName + self.load_shp(shapeName) + self.load_shx(shapeName) + self.load_dbf(shapeName) + if not (self.shp or self.dbf): + raise ShapefileException( + "Unable to open %s.dbf or %s.shp." % (shapeName, shapeName) + ) + if self.shp: + self.__shpHeader() + if self.dbf: + self.__dbfHeader() + if self.shx: + self.__shxHeader() + + def load_shp(self, shapefile_name): + """ + Attempts to load file with .shp extension as both lower and upper case + """ + shp_ext = "shp" + try: + self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") + self._files_to_close.append(self.shp) + except IOError: + try: + self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") + self._files_to_close.append(self.shp) + except IOError: + pass + + def load_shx(self, shapefile_name): + """ + Attempts to load file with .shx extension as both lower and upper case + """ + shx_ext = "shx" + try: + self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") + self._files_to_close.append(self.shx) + except IOError: + try: + self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") + self._files_to_close.append(self.shx) + except IOError: + pass + + def load_dbf(self, shapefile_name): + """ + Attempts to load file with .dbf extension as both lower and upper case + """ + dbf_ext = "dbf" + try: + self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") + self._files_to_close.append(self.dbf) + except IOError: + try: + self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") + self._files_to_close.append(self.dbf) + except IOError: + pass + + def __del__(self): + self.close() + + def close(self): + # Close any files that the reader opened (but not those given by user) + for attribute in self._files_to_close: + if hasattr(attribute, "close"): + try: + attribute.close() + except IOError: + pass + self._files_to_close = [] + + def __getFileObj(self, f): + """Checks to see if the requested shapefile file object is + available. If not a ShapefileException is raised.""" + if not f: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object." + ) + if self.shp and self.shpLength is None: + self.load() + if self.dbf and len(self.fields) == 0: + self.load() + return f + + def __restrictIndex(self, i): + """Provides list-like handling of a record index with a clearer + error message if the index is out of bounds.""" + if self.numRecords: + rmax = self.numRecords - 1 + if abs(i) > rmax: + raise IndexError( + "Shape or Record index: %s out of range. Max index: %s" % (i, rmax) + ) + if i < 0: + i = range(self.numRecords)[i] + return i + + def __shpHeader(self): + """Reads the header information from a .shp file.""" + if not self.shp: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shp file found" + ) + shp = self.shp + # File length (16-bit word * 2 = bytes) + shp.seek(24) + self.shpLength = unpack(">i", shp.read(4))[0] * 2 + # Shape type + shp.seek(32) + self.shapeType = unpack(" NODATA: + self.mbox.append(m) + else: + self.mbox.append(None) + + def __shape(self, oid=None, bbox=None): + """Returns the header info and geometry for a single shape.""" + f = self.__getFileObj(self.shp) + record = Shape(oid=oid) + nParts = nPoints = zmin = zmax = mmin = mmax = None + (recNum, recLength) = unpack(">2i", f.read(8)) + # Determine the start of the next record + next = f.tell() + (2 * recLength) + shapeType = unpack("= 16: + (mmin, mmax) = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next - f.tell() >= nPoints * 8: + record.m = [] + for m in _Array("d", unpack("<%sd" % nPoints, f.read(nPoints * 8))): + if m > NODATA: + record.m.append(m) + else: + record.m.append(None) + else: + record.m = [None for _ in range(nPoints)] + # Read a single point + if shapeType in (1, 11, 21): + record.points = [_Array("d", unpack("<2d", f.read(16)))] + if bbox is not None: + # create bounding box for Point by duplicating coordinates + point_bbox = list(record.points[0] + record.points[0]) + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, point_bbox): + f.seek(next) + return None + # Read a single Z value + if shapeType == 11: + record.z = list(unpack("= 8: + (m,) = unpack(" NODATA: + record.m = [m] + else: + record.m = [None] + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. + f.seek(next) + return record + + def __shxHeader(self): + """Reads the header information from a .shx file.""" + shx = self.shx + if not shx: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shx file found" + ) + # File length (16-bit word * 2 = bytes) - header length + shx.seek(24) + shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 + self.numShapes = shxRecordLength // 8 + + def __shxOffsets(self): + """Reads the shape offset positions from a .shx file""" + shx = self.shx + if not shx: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shx file found" + ) + # Jump to the first record. + shx.seek(100) + # Each index record consists of two nrs, we only want the first one + shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) + if sys.byteorder != "big": + shxRecords.byteswap() + self._offsets = [2 * el for el in shxRecords[::2]] + + def __shapeIndex(self, i=None): + """Returns the offset in a .shp file for a shape based on information + in the .shx index file.""" + shx = self.shx + # Return None if no shx or no index requested + if not shx or i is None: + return None + # At this point, we know the shx file exists + if not self._offsets: + self.__shxOffsets() + return self._offsets[i] + + def shape(self, i=0, bbox=None): + """Returns a shape object for a shape in the geometry + record file. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. + """ + shp = self.__getFileObj(self.shp) + i = self.__restrictIndex(i) + offset = self.__shapeIndex(i) + if not offset: + # Shx index not available. + # Determine length of shp file + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until the requested index or end of file. + unpack = Struct(">2i").unpack + _i = 0 + offset = shp.tell() + while offset < shpLength: + if _i == i: + # Reached the requested index, exit loop with the offset value + break + # Unpack the shape header only + (recNum, recLength) = unpack(shp.read(8)) + # Jump to next shape position + offset += 8 + (2 * recLength) + shp.seek(offset) + _i += 1 + # If the index was not found, it likely means the .shp file is incomplete + if _i != i: + raise ShapefileException( + "Shape index {} is out of bounds; the .shp file only contains {} shapes".format( + i, _i + ) + ) + + # Seek to the offset and read the shape + shp.seek(offset) + return self.__shape(oid=i, bbox=bbox) + + def shapes(self, bbox=None): + """Returns all shapes in a shapefile. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shapes = Shapes() + shapes.extend(self.iterShapes(bbox=bbox)) + return shapes + + def iterShapes(self, bbox=None): + """Returns a generator of shapes in a shapefile. Useful + for handling large shapefiles. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shp = self.__getFileObj(self.shp) + # Found shapefiles which report incorrect + # shp file length in the header. Can't trust + # that so we seek to the end of the file + # and figure it out. + shp.seek(0, 2) + shpLength = shp.tell() + shp.seek(100) + + if self.numShapes: + # Iterate exactly the number of shapes from shx header + for i in xrange(self.numShapes): + # MAYBE: check if more left of file or exit early? + shape = self.__shape(oid=i, bbox=bbox) + if shape: + yield shape + else: + # No shx file, unknown nr of shapes + # Instead iterate until reach end of file + # Collect the offset indices during iteration + i = 0 + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + shape = self.__shape(oid=i, bbox=bbox) + pos = shp.tell() + if shape: + yield shape + i += 1 + # Entire shp file consumed + # Update the number of shapes and list of offsets + assert i == len(offsets) + self.numShapes = i + self._offsets = offsets + + def __dbfHeader(self): + """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" + if not self.dbf: + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" + ) + dbf = self.dbf + # read relevant header parts + dbf.seek(0) + self.numRecords, self.__dbfHdrLength, self.__recordLength = unpack( + " self.numRecords: + raise IndexError( + "abs(stop): %s exceeds number of records: %s." + % (abs(stop), self.numRecords) + ) + elif stop < 0: + stop = range(self.numRecords)[stop] + recSize = self.__recordLength + f.seek(self.__dbfHdrLength + (start * recSize)) + fieldTuples, recLookup, recStruct = self.__recordFields(fields) + for i in xrange(start, stop): + r = self.__record( + oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct + ) + if r: + yield r + + def shapeRecord(self, i=0, fields=None, bbox=None): + """Returns a combination geometry and attribute record for the + supplied record index. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. + """ + i = self.__restrictIndex(i) + shape = self.shape(i, bbox=bbox) + if shape: + record = self.record(i, fields=fields) + return ShapeRecord(shape=shape, record=record) + + def shapeRecords(self, fields=None, bbox=None): + """Returns a list of combination geometry/attribute records for + all records in a shapefile. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + To only read entries within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + return ShapeRecords(self.iterShapeRecords(fields=fields, bbox=bbox)) + + def iterShapeRecords(self, fields=None, bbox=None): + """Returns a generator of combination geometry/attribute records for + all records in a shapefile. + To only read some of the fields, specify the 'fields' arg as a + list of one or more fieldnames. + To only read entries within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + if bbox is None: + # iterate through all shapes and records + for shape, record in izip( + self.iterShapes(), self.iterRecords(fields=fields) + ): + yield ShapeRecord(shape=shape, record=record) + else: + # only iterate where shape.bbox overlaps with the given bbox + # TODO: internal __record method should be faster but would have to + # make sure to seek to correct file location... + + # fieldTuples,recLookup,recStruct = self.__recordFields(fields) + for shape in self.iterShapes(bbox=bbox): + if shape: + # record = self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) + record = self.record(i=shape.oid, fields=fields) + yield ShapeRecord(shape=shape, record=record) + + +class Writer(object): + """Provides write support for ESRI Shapefiles.""" + + def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): + self.target = target + self.autoBalance = autoBalance + self.fields = [] + self.shapeType = shapeType + self.shp = self.shx = self.dbf = None + self._files_to_close = [] + if target: + target = pathlike_obj(target) + if not is_string(target): + raise Exception( + "The target filepath {} must be of type str/unicode or path-like, not {}.".format( + repr(target), type(target) + ) + ) + self.shp = self.__getFileObj(os.path.splitext(target)[0] + ".shp") + self.shx = self.__getFileObj(os.path.splitext(target)[0] + ".shx") + self.dbf = self.__getFileObj(os.path.splitext(target)[0] + ".dbf") + elif kwargs.get("shp") or kwargs.get("shx") or kwargs.get("dbf"): + shp, shx, dbf = kwargs.get("shp"), kwargs.get("shx"), kwargs.get("dbf") + if shp: + self.shp = self.__getFileObj(shp) + if shx: + self.shx = self.__getFileObj(shx) + if dbf: + self.dbf = self.__getFileObj(dbf) + else: + raise Exception( + "Either the target filepath, or any of shp, shx, or dbf must be set to create a shapefile." + ) + # Initiate with empty headers, to be finalized upon closing + if self.shp: + self.shp.write(b"9" * 100) + if self.shx: + self.shx.write(b"9" * 100) + # Geometry record offsets and lengths for writing shx file. + self.recNum = 0 + self.shpNum = 0 + self._bbox = None + self._zbox = None + self._mbox = None + # Use deletion flags in dbf? Default is false (0). Note: Currently has no effect, records should NOT contain deletion flags. + self.deletionFlag = 0 + # Encoding + self.encoding = kwargs.pop("encoding", "utf-8") + self.encodingErrors = kwargs.pop("encodingErrors", "strict") + + def __len__(self): + """Returns the current number of features written to the shapefile. + If shapes and records are unbalanced, the length is considered the highest + of the two.""" + return max(self.recNum, self.shpNum) + + def __enter__(self): + """ + Enter phase of context manager. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit phase of context manager, finish writing and close the files. + """ + self.close() + + def __del__(self): + self.close() + + def close(self): + """ + Write final shp, shx, and dbf headers, close opened files. + """ + # Check if any of the files have already been closed + shp_open = self.shp and not (hasattr(self.shp, "closed") and self.shp.closed) + shx_open = self.shx and not (hasattr(self.shx, "closed") and self.shx.closed) + dbf_open = self.dbf and not (hasattr(self.dbf, "closed") and self.dbf.closed) + + # Balance if already not balanced + if self.shp and shp_open and self.dbf and dbf_open: + if self.autoBalance: + self.balance() + if self.recNum != self.shpNum: + raise ShapefileException( + "When saving both the dbf and shp file, " + "the number of records (%s) must correspond " + "with the number of shapes (%s)" % (self.recNum, self.shpNum) + ) + # Fill in the blank headers + if self.shp and shp_open: + self.__shapefileHeader(self.shp, headerType="shp") + if self.shx and shx_open: + self.__shapefileHeader(self.shx, headerType="shx") + + # Update the dbf header with final length etc + if self.dbf and dbf_open: + self.__dbfHeader() + + # Flush files + for attribute in (self.shp, self.shx, self.dbf): + if hasattr(attribute, "flush") and not ( + hasattr(attribute, "closed") and attribute.closed + ): + try: + attribute.flush() + except IOError: + pass + + # Close any files that the writer opened (but not those given by user) + for attribute in self._files_to_close: + if hasattr(attribute, "close"): + try: + attribute.close() + except IOError: + pass + self._files_to_close = [] + + def __getFileObj(self, f): + """Safety handler to verify file-like objects""" + if not f: + raise ShapefileException("No file-like object available.") + elif hasattr(f, "write"): + return f + else: + pth = os.path.split(f)[0] + if pth and not os.path.exists(pth): + os.makedirs(pth) + fp = open(f, "wb+") + self._files_to_close.append(fp) + return fp + + def __shpFileLength(self): + """Calculates the file length of the shp file.""" + # Remember starting position + start = self.shp.tell() + # Calculate size of all shapes + self.shp.seek(0, 2) + size = self.shp.tell() + # Calculate size as 16-bit words + size //= 2 + # Return to start + self.shp.seek(start) + return size + + def __bbox(self, s): + x = [] + y = [] + if len(s.points) > 0: + px, py = list(zip(*s.points))[:2] + x.extend(px) + y.extend(py) + else: + # this should not happen. + # any shape that is not null should have at least one point, and only those should be sent here. + # could also mean that earlier code failed to add points to a non-null shape. + raise Exception( + "Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." + % s.shapeType + ) + bbox = [min(x), min(y), max(x), max(y)] + # update global + if self._bbox: + # compare with existing + self._bbox = [ + min(bbox[0], self._bbox[0]), + min(bbox[1], self._bbox[1]), + max(bbox[2], self._bbox[2]), + max(bbox[3], self._bbox[3]), + ] + else: + # first time bbox is being set + self._bbox = bbox + return bbox + + def __zbox(self, s): + z = [] + for p in s.points: + try: + z.append(p[2]) + except IndexError: + # point did not have z value + # setting it to 0 is probably ok, since it means all are on the same elevation + z.append(0) + zbox = [min(z), max(z)] + # update global + if self._zbox: + # compare with existing + self._zbox = [min(zbox[0], self._zbox[0]), max(zbox[1], self._zbox[1])] + else: + # first time zbox is being set + self._zbox = zbox + return zbox + + def __mbox(self, s): + mpos = 3 if s.shapeType in (11, 13, 15, 18, 31) else 2 + m = [] + for p in s.points: + try: + if p[mpos] is not None: + # mbox should only be calculated on valid m values + m.append(p[mpos]) + except IndexError: + # point did not have m value so is missing + # mbox should only be calculated on valid m values + pass + if not m: + # only if none of the shapes had m values, should mbox be set to missing m values + m.append(NODATA) + mbox = [min(m), max(m)] + # update global + if self._mbox: + # compare with existing + self._mbox = [min(mbox[0], self._mbox[0]), max(mbox[1], self._mbox[1])] + else: + # first time mbox is being set + self._mbox = mbox + return mbox + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def bbox(self): + """Returns the current bounding box for the shapefile which is + the lower-left and upper-right corners. It does not contain the + elevation or measure extremes.""" + return self._bbox + + def zbox(self): + """Returns the current z extremes for the shapefile.""" + return self._zbox + + def mbox(self): + """Returns the current m extremes for the shapefile.""" + return self._mbox + + def __shapefileHeader(self, fileObj, headerType="shp"): + """Writes the specified header type to the specified file-like object. + Several of the shapefile formats are so similar that a single generic + method to read or write them is warranted.""" + f = self.__getFileObj(fileObj) + f.seek(0) + # File code, Unused bytes + f.write(pack(">6i", 9994, 0, 0, 0, 0, 0)) + # File length (Bytes / 2 = 16-bit words) + if headerType == "shp": + f.write(pack(">i", self.__shpFileLength())) + elif headerType == "shx": + f.write(pack(">i", ((100 + (self.shpNum * 8)) // 2))) + # Version, Shape type + if self.shapeType is None: + self.shapeType = NULL + f.write(pack("<2i", 1000, self.shapeType)) + # The shapefile's bounding box (lower left, upper right) + if self.shapeType != 0: + try: + bbox = self.bbox() + if bbox is None: + # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. + # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. + # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. + # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. + bbox = [0, 0, 0, 0] + f.write(pack("<4d", *bbox)) + except error: + raise ShapefileException( + "Failed to write shapefile bounding box. Floats required." + ) + else: + f.write(pack("<4d", 0, 0, 0, 0)) + # Elevation + if self.shapeType in (11, 13, 15, 18): + # Z values are present in Z type + zbox = self.zbox() + if zbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + zbox = [0, 0] + else: + # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s + zbox = [0, 0] + # Measure + if self.shapeType in (11, 13, 15, 18, 21, 23, 25, 28, 31): + # M values are present in M or Z type + mbox = self.mbox() + if mbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + mbox = [0, 0] + else: + # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s + mbox = [0, 0] + # Try writing + try: + f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) + except error: + raise ShapefileException( + "Failed to write shapefile elevation and measure values. Floats required." + ) + + def __dbfHeader(self): + """Writes the dbf header and field descriptors.""" + f = self.__getFileObj(self.dbf) + f.seek(0) + version = 3 + year, month, day = time.localtime()[:3] + year -= 1900 + # Get all fields, ignoring DeletionFlag if specified + fields = [field for field in self.fields if field[0] != "DeletionFlag"] + # Ensure has at least one field + if not fields: + raise ShapefileException( + "Shapefile dbf file must contain at least one field." + ) + numRecs = self.recNum + numFields = len(fields) + headerLength = numFields * 32 + 33 + if headerLength >= 65535: + raise ShapefileException( + "Shapefile dbf header length exceeds maximum length." + ) + recordLength = sum([int(field[2]) for field in fields]) + 1 + header = pack( + "2i", self.shpNum, 0)) + start = f.tell() + # Shape Type + if self.shapeType is None and s.shapeType != NULL: + self.shapeType = s.shapeType + if s.shapeType != NULL and s.shapeType != self.shapeType: + raise Exception( + "The shape's type (%s) must match the type of the shapefile (%s)." + % (s.shapeType, self.shapeType) + ) + f.write(pack(" 2 else 0)) for p in s.points] + except error: + raise ShapefileException( + "Failed to write elevation values for record %s. Expected floats." + % self.shpNum + ) + # Write m extremes and values + # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA + # Note: missing m values are autoset to NODATA. + if s.shapeType in (13, 15, 18, 23, 25, 28, 31): + try: + f.write(pack("<2d", *self.__mbox(s))) + except error: + raise ShapefileException( + "Failed to write measure extremes for record %s. Expected floats" + % self.shpNum + ) + try: + if hasattr(s, "m"): + # if m values are stored in attribute + # fmt: off + f.write( + pack( + "<%sd" % len(s.m), + *[m if m is not None else NODATA for m in s.m] + ) + ) + # fmt: on + else: + # if m values are stored as 3rd/4th dimension + # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) + mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 + [ + f.write( + pack( + " mpos and p[mpos] is not None + else NODATA, + ) + ) + for p in s.points + ] + except error: + raise ShapefileException( + "Failed to write measure values for record %s. Expected floats" + % self.shpNum + ) + # Write a single point + if s.shapeType in (1, 11, 21): + try: + f.write(pack("<2d", s.points[0][0], s.points[0][1])) + except error: + raise ShapefileException( + "Failed to write point for record %s. Expected floats." + % self.shpNum + ) + # Write a single Z value + # Note: missing z values are autoset to 0, but not sure if this is ideal. + if s.shapeType == 11: + # update the global z box + self.__zbox(s) + # then write value + if hasattr(s, "z"): + # if z values are stored in attribute + try: + if not s.z: + s.z = (0,) + f.write(pack("i", length)) + f.seek(finish) + return offset, length + + def __shxRecord(self, offset, length): + """Writes the shx records.""" + f = self.__getFileObj(self.shx) + try: + f.write(pack(">i", offset // 2)) + except error: + raise ShapefileException( + "The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." + ) + f.write(pack(">i", length)) + + def record(self, *recordList, **recordDict): + """Creates a dbf attribute record. You can submit either a sequence of + field values or keyword arguments of field names and values. Before + adding records you must add fields for the record values using the + field() method. If the record values exceed the number of fields the + extra ones won't be added. In the case of using keyword arguments to specify + field/value pairs only fields matching the already registered fields + will be added.""" + # Balance if already not balanced + if self.autoBalance and self.recNum > self.shpNum: + self.balance() + + fieldCount = sum((1 for field in self.fields if field[0] != "DeletionFlag")) + if recordList: + record = list(recordList) + while len(record) < fieldCount: + record.append("") + elif recordDict: + record = [] + for field in self.fields: + if field[0] == "DeletionFlag": + continue # ignore deletionflag field in case it was specified + if field[0] in recordDict: + val = recordDict[field[0]] + if val is None: + record.append("") + else: + record.append(val) + else: + record.append("") # need empty value for missing dict entries + else: + # Blank fields for empty record + record = ["" for _ in range(fieldCount)] + self.__dbfRecord(record) + + def __dbfRecord(self, record): + """Writes the dbf records.""" + f = self.__getFileObj(self.dbf) + if self.recNum == 0: + # first records, so all fields should be set + # allowing us to write the dbf header + # cannot change the fields after this point + self.__dbfHeader() + # first byte of the record is deletion flag, always disabled + f.write(b" ") + # begin + self.recNum += 1 + fields = ( + field for field in self.fields if field[0] != "DeletionFlag" + ) # ignore deletionflag field in case it was specified + for (fieldName, fieldType, size, deci), value in zip(fields, record): + # write + fieldType = fieldType.upper() + size = int(size) + if fieldType in ("N", "F"): + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + value = b"*" * size # QGIS NULL + elif not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) + value = format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size + else: + value = float(value) + value = format(value, ".%sf" % deci)[:size].rjust( + size + ) # caps the size if exceeds the field size + elif fieldType == "D": + # date: 8 bytes - date stored as a string in the format YYYYMMDD. + if isinstance(value, date): + value = "{:04d}{:02d}{:02d}".format( + value.year, value.month, value.day + ) + elif isinstance(value, list) and len(value) == 3: + value = "{:04d}{:02d}{:02d}".format(*value) + elif value in MISSING: + value = b"0" * 8 # QGIS NULL for date type + elif is_string(value) and len(value) == 8: + pass # value is already a date string + else: + raise ShapefileException( + "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." + ) + elif fieldType == "L": + # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. + if value in MISSING: + value = b" " # missing is set to space + elif value in [True, 1]: + value = b"T" + elif value in [False, 0]: + value = b"F" + else: + value = b" " # unknown is set to space + else: + # anything else is forced to string, truncated to the length of the field + value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) + if not isinstance(value, bytes): + # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) + value = b( + value, "ascii", self.encodingErrors + ) # should be default ascii encoding + if len(value) != size: + raise ShapefileException( + "Shapefile Writer unable to pack incorrect sized value" + " (size %d) into field '%s' (size %d)." + % (len(value), fieldName, size) + ) + f.write(value) + + def balance(self): + """Adds corresponding empty attributes or null geometry records depending + on which type of record was created to make sure all three files + are in synch.""" + while self.recNum > self.shpNum: + self.null() + while self.recNum < self.shpNum: + self.record() + + def null(self): + """Creates a null shape.""" + self.shape(Shape(NULL)) + + def point(self, x, y): + """Creates a POINT shape.""" + shapeType = POINT + pointShape = Shape(shapeType) + pointShape.points.append([x, y]) + self.shape(pointShape) + + def pointm(self, x, y, m=None): + """Creates a POINTM shape. + If the m (measure) value is not set, it defaults to NoData.""" + shapeType = POINTM + pointShape = Shape(shapeType) + pointShape.points.append([x, y, m]) + self.shape(pointShape) + + def pointz(self, x, y, z=0, m=None): + """Creates a POINTZ shape. + If the z (elevation) value is not set, it defaults to 0. + If the m (measure) value is not set, it defaults to NoData.""" + shapeType = POINTZ + pointShape = Shape(shapeType) + pointShape.points.append([x, y, z, m]) + self.shape(pointShape) + + def multipoint(self, points): + """Creates a MULTIPOINT shape. + Points is a list of xy values.""" + shapeType = MULTIPOINT + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def multipointm(self, points): + """Creates a MULTIPOINTM shape. + Points is a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPOINTM + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def multipointz(self, points): + """Creates a MULTIPOINTZ shape. + Points is a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPOINTZ + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def line(self, lines): + """Creates a POLYLINE shape. + Lines is a collection of lines, each made up of a list of xy values.""" + shapeType = POLYLINE + self._shapeparts(parts=lines, shapeType=shapeType) + + def linem(self, lines): + """Creates a POLYLINEM shape. + Lines is a collection of lines, each made up of a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYLINEM + self._shapeparts(parts=lines, shapeType=shapeType) + + def linez(self, lines): + """Creates a POLYLINEZ shape. + Lines is a collection of lines, each made up of a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYLINEZ + self._shapeparts(parts=lines, shapeType=shapeType) + + def poly(self, polys): + """Creates a POLYGON shape. + Polys is a collection of polygons, each made up of a list of xy values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction.""" + shapeType = POLYGON + self._shapeparts(parts=polys, shapeType=shapeType) + + def polym(self, polys): + """Creates a POLYGONM shape. + Polys is a collection of polygons, each made up of a list of xym values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYGONM + self._shapeparts(parts=polys, shapeType=shapeType) + + def polyz(self, polys): + """Creates a POLYGONZ shape. + Polys is a collection of polygons, each made up of a list of xyzm values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYGONZ + self._shapeparts(parts=polys, shapeType=shapeType) + + def multipatch(self, parts, partTypes): + """Creates a MULTIPATCH shape. + Parts is a collection of 3D surface patches, each made up of a list of xyzm values. + PartTypes is a list of types that define each of the surface patches. + The types can be any of the following module constants: TRIANGLE_STRIP, + TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPATCH + polyShape = Shape(shapeType) + polyShape.parts = [] + polyShape.points = [] + for part in parts: + # set part index position + polyShape.parts.append(len(polyShape.points)) + # add points + for point in part: + # Ensure point is list + if not isinstance(point, list): + point = list(point) + polyShape.points.append(point) + polyShape.partTypes = partTypes + # write the shape + self.shape(polyShape) + + def _shapeparts(self, parts, shapeType): + """Internal method for adding a shape that has multiple collections of points (parts): + lines, polygons, and multipoint shapes. + """ + polyShape = Shape(shapeType) + polyShape.parts = [] + polyShape.points = [] + # Make sure polygon rings (parts) are closed + if shapeType in (5, 15, 25, 31): + for part in parts: + if part[0] != part[-1]: + part.append(part[0]) + # Add points and part indexes + for part in parts: + # set part index position + polyShape.parts.append(len(polyShape.points)) + # add points + for point in part: + # Ensure point is list + if not isinstance(point, list): + point = list(point) + polyShape.points.append(point) + # write the shape + self.shape(polyShape) + + def field(self, name, fieldType="C", size="50", decimal=0): + """Adds a dbf field descriptor to the shapefile.""" + if fieldType == "D": + size = "8" + decimal = 0 + elif fieldType == "L": + size = "1" + decimal = 0 + if len(self.fields) >= 2046: + raise ShapefileException( + "Shapefile Writer reached maximum number of fields: 2046." + ) + self.fields.append((name, fieldType, size, decimal)) + + +# Begin Testing +def _get_doctests(): + import doctest + + doctest.NORMALIZE_WHITESPACE = 1 + + # run tests + with open("README.md", "rb") as fobj: + tests = doctest.DocTestParser().get_doctest( + string=fobj.read().decode("utf8").replace("\r\n", "\n"), + globs={}, + name="README", + filename="README.md", + lineno=0, + ) + + return tests + + +def _filter_network_doctests(examples, include_network=False, include_non_network=True): + globals_from_network_doctests = set() + + if not (include_network or include_non_network): + return + + examples_it = iter(examples) + + yield next(examples_it) + + for example in examples_it: + # Track variables in doctest shell sessions defined from commands + # that poll remote URLs, to skip subsequent commands until all + # such dependent variables are reassigned. + + if 'sf = shapefile.Reader("https://' in example.source: + globals_from_network_doctests.add("sf") + if include_network: + yield example + continue + + lhs = example.source.partition("=")[0] + + for target in lhs.split(","): + target = target.strip() + if target in globals_from_network_doctests: + globals_from_network_doctests.remove(target) + + # Non-network tests dependent on the network tests. + if globals_from_network_doctests: + if include_network: + yield example + continue + + if not include_non_network: + continue + + yield example + + +def _replace_remote_url( + old_url, + # Default port of Python http.server and Python 2's SimpleHttpServer + port=8000, + scheme="http", + netloc="localhost", + path=None, + params="", + query="", + fragment="", +): + old_parsed = urlparse(old_url) + + # Strip subpaths, so an artefacts + # repo or file tree can be simpler and flat + if path is None: + path = old_parsed.path.rpartition("/")[2] + + if port not in (None, ""): + netloc = "%s:%s" % (netloc, port) + + new_parsed = old_parsed._replace( + scheme=scheme, + netloc=netloc, + path=path, + params=params, + query=query, + fragment=fragment, + ) + + new_url = urlunparse(new_parsed) if PYTHON3 else urlunparse(list(new_parsed)) + return new_url + + +def _test(args=sys.argv[1:], verbosity=0): + if verbosity == 0: + print("Getting doctests...") + + import doctest + import re + + doctest.NORMALIZE_WHITESPACE = 1 + + tests = _get_doctests() + + if len(args) >= 2 and args[0] == "-m": + if verbosity == 0: + print("Filtering doctests...") + tests.examples = list( + _filter_network_doctests( + tests.examples, + include_network=args[1] == "network", + include_non_network=args[1] == "not network", + ) + ) + + if REPLACE_REMOTE_URLS_WITH_LOCALHOST: + if verbosity == 0: + print("Replacing remote urls with http://localhost in doctests...") + + for example in tests.examples: + match_url_str_literal = re.search(r'"(https://.*)"', example.source) + if not match_url_str_literal: + continue + old_url = match_url_str_literal.group(1) + new_url = _replace_remote_url(old_url) + example.source = example.source.replace(old_url, new_url) + + class Py23DocChecker(doctest.OutputChecker): + def check_output(self, want, got, optionflags): + if sys.version_info[0] == 2: + got = re.sub("u'(.*?)'", "'\\1'", got) + got = re.sub('u"(.*?)"', '"\\1"', got) + res = doctest.OutputChecker.check_output(self, want, got, optionflags) + return res + + def summarize(self): + doctest.OutputChecker.summarize(True) + + runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) + + if verbosity == 0: + print("Running %s doctests..." % len(tests.examples)) + failure_count, test_count = runner.run(tests) + + # print results + if verbosity: + runner.summarize(True) + else: + if failure_count == 0: + print("All test passed successfully") + elif failure_count > 0: + runner.summarize(verbosity) + + return failure_count + + +if __name__ == "__main__": + """ + Doctests are contained in the file 'README.md', and are tested using the built-in + testing libraries. + """ + failure_count = _test() + sys.exit(failure_count) diff --git a/test_shapefile.py b/test_shapefile.py index 93bb6049..1b7182f9 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1,203 +1,436 @@ """ This module tests the functionality of shapefile.py. """ -# std lib imports + +import datetime +import json import os.path -import sys -if sys.version_info.major == 3: + +try: from pathlib import Path +except ImportError: + # pathlib2 is a dependency of pytest >= 3.7 + from pathlib2 import Path # third party imports import pytest -import json -import datetime -if sys.version_info.major == 2: - # required by pytest for python <36 - from pathlib2 import Path # our imports import shapefile - # define various test shape tuples of (type, points, parts indexes, and expected geo interface output) -geo_interface_tests = [ (shapefile.POINT, # point - [(1,1)], - [], - {'type':'Point','coordinates':(1,1)} - ), - (shapefile.MULTIPOINT, # multipoint - [(1,1),(2,1),(2,2)], - [], - {'type':'MultiPoint','coordinates':[(1,1),(2,1),(2,2)]} - ), - (shapefile.POLYLINE, # single linestring - [(1,1),(2,1)], - [0], - {'type':'LineString','coordinates':[(1,1),(2,1)]} - ), - (shapefile.POLYLINE, # multi linestring - [(1,1),(2,1), # line 1 - (10,10),(20,10)], # line 2 - [0,2], - {'type':'MultiLineString','coordinates':[ - [(1,1),(2,1)], # line 1 - [(10,10),(20,10)] # line 2 - ]} - ), - (shapefile.POLYGON, # single polygon, no holes - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior - ], - [0], - {'type':'Polygon','coordinates':[ - [(1,1),(1,9),(9,9),(9,1),(1,1)], - ]} - ), - (shapefile.POLYGON, # single polygon, holes (ordered) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1 - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 2 - ], - [0,5,5+5], - {'type':'Polygon','coordinates':[ - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ]} - ), - (shapefile.POLYGON, # single polygon, holes (unordered) - [ - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1 - (1,1),(1,9),(9,9),(9,1),(1,1), # exterior - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 2 - ], - [0,5,5+5], - {'type':'Polygon','coordinates':[ - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ]} - ), - (shapefile.POLYGON, # multi polygon, no holes - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior - (11,11),(11,19),(19,19),(19,11),(11,11), # exterior - ], - [0,5], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], - ], - [ # poly 2 - [(11,11),(11,19),(19,19),(19,11),(11,11)], - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, holes (unordered) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (11,11),(11,19),(19,19),(19,11),(11,11), # exterior 2 - (12,12),(14,12),(14,14),(12,14),(12,12), # hole 2.1 - (15,15),(17,15),(17,17),(15,17),(15,15), # hole 2.2 - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1.1 - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 1.2 - ], - [0,5,10,15,20,25], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ], - [ # poly 2 - [(11,11),(11,19),(19,19),(19,11),(11,11)], # exterior - [(12,12),(14,12),(14,14),(12,14),(12,12)], # hole 1 - [(15,15),(17,15),(17,17),(15,17),(15,15)], # hole 2 - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (3,3),(3,7),(7,7),(7,3),(3,3), # exterior 2 - (4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5), # exterior 3 - (4,4),(6,4),(6,6),(4,6),(4,4), # hole 2.1 - (2,2),(8,2),(8,8),(2,8),(2,2), # hole 1.1 - ], - [0,5,10,15,20], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior 1 - [(2,2),(8,2),(8,8),(2,8),(2,2)], # hole 1.1 - ], - [ # poly 2 - [(3,3),(3,7),(7,7),(7,3),(3,3)], # exterior 2 - [(4,4),(6,4),(6,6),(4,6),(4,4)], # hole 2.1 - ], - [ # poly 3 - [(4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5)], # exterior 3 - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered and tricky holes designed to throw off ring_sample() test) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (3,3),(3,7),(7,7),(7,3),(3,3), # exterior 2 - (4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5), # exterior 3 - (4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4), # hole 2.1 (hole has duplicate coords) - (2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2), # hole 1.1 (hole coords form straight line and starts in concave orientation) - ], - [0,5,10,15,20+3], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior 1 - [(2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2)], # hole 1.1 - ], - [ # poly 2 - [(3,3),(3,7),(7,7),(7,3),(3,3)], # exterior 2 - [(4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4)], # hole 2.1 - ], - [ # poly 3 - [(4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5)], # exterior 3 - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, holes incl orphaned holes (unordered), should raise warning - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (11,11),(11,19),(19,19),(19,11),(11,11), # exterior 2 - (12,12),(14,12),(14,14),(12,14),(12,12), # hole 2.1 - (15,15),(17,15),(17,17),(15,17),(15,15), # hole 2.2 - (95,95),(97,95),(97,97),(95,97),(95,95), # hole x.1 (orphaned hole, should be interpreted as exterior) - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1.1 - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 1.2 - ], - [0,5,10,15,20,25,30], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ], - [ # poly 2 - [(11,11),(11,19),(19,19),(19,11),(11,11)], # exterior - [(12,12),(14,12),(14,14),(12,14),(12,12)], # hole 1 - [(15,15),(17,15),(17,17),(15,17),(15,15)], # hole 2 - ], - [ # poly 3 (orphaned hole) - [(95,95),(97,95),(97,97),(95,97),(95,95)], # exterior - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such), should raise warning - [(1,1),(9,1),(9,9),(1,9),(1,1), # exterior with hole-orientation - (11,11),(19,11),(19,19),(11,19),(11,11), # exterior with hole-orientation - ], - [0,5], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(9,1),(9,9),(1,9),(1,1)], - ], - [ # poly 2 - [(11,11),(19,11),(19,19),(11,19),(11,11)], - ], - ]} - ), - ] +geo_interface_tests = [ + ( + shapefile.POINT, # point + [(1, 1)], + [], + {"type": "Point", "coordinates": (1, 1)}, + ), + ( + shapefile.MULTIPOINT, # multipoint + [(1, 1), (2, 1), (2, 2)], + [], + {"type": "MultiPoint", "coordinates": [(1, 1), (2, 1), (2, 2)]}, + ), + ( + shapefile.POLYLINE, # single linestring + [(1, 1), (2, 1)], + [0], + {"type": "LineString", "coordinates": [(1, 1), (2, 1)]}, + ), + ( + shapefile.POLYLINE, # multi linestring + [ + (1, 1), + (2, 1), # line 1 + (10, 10), + (20, 10), + ], # line 2 + [0, 2], + { + "type": "MultiLineString", + "coordinates": [ + [(1, 1), (2, 1)], # line 1 + [(10, 10), (20, 10)], # line 2 + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, no holes + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + ], + [0], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, holes (ordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 2 + ], + [0, 5, 5 + 5], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, holes (unordered) + [ + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1 + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 2 + ], + [0, 5, 5 + 5], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, no holes + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior + ], + [0, 5], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, holes (unordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior 2 + (12, 12), + (14, 12), + (14, 14), + (12, 14), + (12, 12), # hole 2.1 + (15, 15), + (17, 15), + (17, 17), + (15, 17), + (15, 15), # hole 2.2 + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1.1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 1.2 + ], + [0, 5, 10, 15, 20, 25], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], # exterior + [(12, 12), (14, 12), (14, 14), (12, 14), (12, 12)], # hole 1 + [(15, 15), (17, 15), (17, 17), (15, 17), (15, 15)], # hole 2 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (3, 3), + (3, 7), + (7, 7), + (7, 3), + (3, 3), # exterior 2 + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), # exterior 3 + (4, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), # hole 2.1 + (2, 2), + (8, 2), + (8, 8), + (2, 8), + (2, 2), # hole 1.1 + ], + [0, 5, 10, 15, 20], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior 1 + [(2, 2), (8, 2), (8, 8), (2, 8), (2, 2)], # hole 1.1 + ], + [ # poly 2 + [(3, 3), (3, 7), (7, 7), (7, 3), (3, 3)], # exterior 2 + [(4, 4), (6, 4), (6, 6), (4, 6), (4, 4)], # hole 2.1 + ], + [ # poly 3 + [ + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), + ], # exterior 3 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered and tricky holes designed to throw off ring_sample() test) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (3, 3), + (3, 7), + (7, 7), + (7, 3), + (3, 3), # exterior 2 + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), # exterior 3 + (4, 4), + (4, 4), + (6, 4), + (6, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), # hole 2.1 (hole has duplicate coords) + (2, 2), + (3, 3), + (4, 2), + (8, 2), + (8, 8), + (4, 8), + (2, 8), + (2, 4), + ( + 2, + 2, + ), # hole 1.1 (hole coords form straight line and starts in concave orientation) + ], + [0, 5, 10, 15, 20 + 3], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior 1 + [ + (2, 2), + (3, 3), + (4, 2), + (8, 2), + (8, 8), + (4, 8), + (2, 8), + (2, 4), + (2, 2), + ], # hole 1.1 + ], + [ # poly 2 + [(3, 3), (3, 7), (7, 7), (7, 3), (3, 3)], # exterior 2 + [ + (4, 4), + (4, 4), + (6, 4), + (6, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), + ], # hole 2.1 + ], + [ # poly 3 + [ + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), + ], # exterior 3 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, holes incl orphaned holes (unordered), should raise warning + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior 2 + (12, 12), + (14, 12), + (14, 14), + (12, 14), + (12, 12), # hole 2.1 + (15, 15), + (17, 15), + (17, 17), + (15, 17), + (15, 15), # hole 2.2 + (95, 95), + (97, 95), + (97, 97), + (95, 97), + (95, 95), # hole x.1 (orphaned hole, should be interpreted as exterior) + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1.1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 1.2 + ], + [0, 5, 10, 15, 20, 25, 30], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], # exterior + [(12, 12), (14, 12), (14, 14), (12, 14), (12, 12)], # hole 1 + [(15, 15), (17, 15), (17, 17), (15, 17), (15, 15)], # hole 2 + ], + [ # poly 3 (orphaned hole) + [(95, 95), (97, 95), (97, 97), (95, 97), (95, 95)], # exterior + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such), should raise warning + [ + (1, 1), + (9, 1), + (9, 9), + (1, 9), + (1, 1), # exterior with hole-orientation + (11, 11), + (19, 11), + (19, 19), + (11, 19), + (11, 11), # exterior with hole-orientation + ], + [0, 5], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (9, 1), (9, 9), (1, 9), (1, 1)], + ], + [ # poly 2 + [(11, 11), (19, 11), (19, 19), (11, 19), (11, 11)], + ], + ], + }, + ), +] + def test_empty_shape_geo_interface(): """ @@ -207,7 +440,8 @@ def test_empty_shape_geo_interface(): """ shape = shapefile.Shape() with pytest.raises(Exception): - shape.__geo_interface__ + getattr(shape, "__geo_interface__") + @pytest.mark.parametrize("typ,points,parts,expected", geo_interface_tests) def test_expected_shape_geo_interface(typ, points, parts, expected): @@ -223,22 +457,22 @@ def test_expected_shape_geo_interface(typ, points, parts, expected): def test_reader_geo_interface(): with shapefile.Reader("shapefiles/blockgroups") as r: geoj = r.__geo_interface__ - assert geoj['type'] == 'FeatureCollection' - assert 'bbox' in geoj + assert geoj["type"] == "FeatureCollection" + assert "bbox" in geoj assert json.dumps(geoj) def test_shapes_geo_interface(): with shapefile.Reader("shapefiles/blockgroups") as r: geoj = r.shapes().__geo_interface__ - assert geoj['type'] == 'GeometryCollection' + assert geoj["type"] == "GeometryCollection" assert json.dumps(geoj) def test_shaperecords_geo_interface(): with shapefile.Reader("shapefiles/blockgroups") as r: geoj = r.shapeRecords().__geo_interface__ - assert geoj['type'] == 'FeatureCollection' + assert geoj["type"] == "FeatureCollection" assert json.dumps(geoj) @@ -253,34 +487,49 @@ def test_reader_url(): """ Assert that Reader can open shapefiles from a url. """ + + # Allow testing loading of shapefiles from a url on localhost (to avoid + # overloading external servers, and associated spurious test failures). + # A suitable repo of test files, and a localhost server setup is + # defined in ./.github/actions/test/actions.yml + if shapefile.REPLACE_REMOTE_URLS_WITH_LOCALHOST: + + def Reader(url): + new_url = shapefile._replace_remote_url(url) + print("repr(new_url): %s" % repr(new_url)) + return shapefile.Reader(new_url) + else: + print("Using plain Reader") + Reader = shapefile.Reader + # test with extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true" - with shapefile.Reader(url) as sf: - for recShape in sf.iterShapeRecords(): + with Reader(url) as sf: + for __recShape in sf.iterShapeRecords(): pass - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test without extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries?raw=true" - with shapefile.Reader(url) as sf: - for recShape in sf.iterShapeRecords(): + with Reader(url) as sf: + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test no files found url = "https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/README.md" with pytest.raises(shapefile.ShapefileException): - with shapefile.Reader(url) as sf: + with Reader(url) as sf: pass # test reading zipfile from url - url = "https://biogeo.ucdavis.edu/data/diva/rrd/NIC_rrd.zip" - with shapefile.Reader(url) as sf: - for recShape in sf.iterShapeRecords(): + url = "https://github.com/JamesParrott/PyShp_test_shapefile/raw/main/gis_osm_natural_a_free_1.zip" + with Reader(url) as sf: + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True def test_reader_zip(): @@ -289,29 +538,33 @@ def test_reader_zip(): """ # test reading zipfile only with shapefile.Reader("shapefiles/blockgroups.zip") as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True - + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True + # test require specific path when reading multi-shapefile zipfile with pytest.raises(shapefile.ShapefileException): with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip") as sf: pass # test specifying the path when reading multi-shapefile zipfile (with extension) - with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp") as sf: - for recShape in sf.iterShapeRecords(): + with shapefile.Reader( + "shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp" + ) as sf: + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test specifying the path when reading multi-shapefile zipfile (without extension) - with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2") as sf: - for recShape in sf.iterShapeRecords(): + with shapefile.Reader( + "shapefiles/blockgroups_multishapefile.zip/blockgroups2" + ) as sf: + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test raising error when can't find shapefile inside zipfile with pytest.raises(shapefile.ShapefileException): @@ -347,9 +600,9 @@ def test_reader_close_filelike(): """ # note uses an actual shapefile from # the projects "shapefiles" directory - shp = open("shapefiles/blockgroups.shp", mode='rb') - shx = open("shapefiles/blockgroups.shx", mode='rb') - dbf = open("shapefiles/blockgroups.dbf", mode='rb') + shp = open("shapefiles/blockgroups.shp", mode="rb") + shx = open("shapefiles/blockgroups.shx", mode="rb") + dbf = open("shapefiles/blockgroups.dbf", mode="rb") sf = shapefile.Reader(shp=shp, shx=shx, dbf=dbf) sf.close() @@ -390,9 +643,9 @@ def test_reader_context_filelike(): """ # note uses an actual shapefile from # the projects "shapefiles" directory - shp = open("shapefiles/blockgroups.shp", mode='rb') - shx = open("shapefiles/blockgroups.shx", mode='rb') - dbf = open("shapefiles/blockgroups.dbf", mode='rb') + shp = open("shapefiles/blockgroups.shp", mode="rb") + shx = open("shapefiles/blockgroups.shx", mode="rb") + dbf = open("shapefiles/blockgroups.dbf", mode="rb") with shapefile.Reader(shp=shp, shx=shx, dbf=dbf) as sf: pass @@ -411,9 +664,9 @@ def test_reader_shapefile_type(): is returned correctly. """ with shapefile.Reader("shapefiles/blockgroups") as sf: - assert sf.shapeType is 5 # 5 means Polygon - assert sf.shapeType is shapefile.POLYGON - assert sf.shapeTypeName is "POLYGON" + assert sf.shapeType == 5 # 5 means Polygon + assert sf.shapeType == shapefile.POLYGON + assert sf.shapeTypeName == "POLYGON" def test_reader_shapefile_length(): @@ -429,9 +682,9 @@ def test_reader_shapefile_length(): def test_shape_metadata(): with shapefile.Reader("shapefiles/blockgroups") as sf: shape = sf.shape(0) - assert shape.shapeType is 5 # Polygon - assert shape.shapeType is shapefile.POLYGON - assert sf.shapeTypeName is "POLYGON" + assert shape.shapeType == 5 # Polygon + assert shape.shapeType == shapefile.POLYGON + assert sf.shapeTypeName == "POLYGON" def test_reader_fields(): @@ -446,10 +699,10 @@ def test_reader_fields(): assert isinstance(fields, list) field = fields[0] - assert isinstance(field[0], str) # field name - assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type - assert isinstance(field[2], int) # field length - assert isinstance(field[3], int) # decimal length + assert isinstance(field[0], str) # field name + assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type + assert isinstance(field[2], int) # field length + assert isinstance(field[3], int) # decimal length def test_reader_shapefile_extension_ignored(): @@ -485,7 +738,7 @@ def test_reader_dbf_only(): with shapefile.Reader(dbf="shapefiles/blockgroups.dbf") as sf: assert len(sf) == 663 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_shp_shx_only(): @@ -494,24 +747,28 @@ def test_reader_shp_shx_only(): shp and shx argument to the shapefile reader reads just the shp and shx file. """ - with shapefile.Reader(shp="shapefiles/blockgroups.shp", shx="shapefiles/blockgroups.shx") as sf: + with shapefile.Reader( + shp="shapefiles/blockgroups.shp", shx="shapefiles/blockgroups.shx" + ) as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 + - def test_reader_shp_dbf_only(): """ Assert that specifying just the shp and shx argument to the shapefile reader reads just the shp and dbf file. """ - with shapefile.Reader(shp="shapefiles/blockgroups.shp", dbf="shapefiles/blockgroups.dbf") as sf: + with shapefile.Reader( + shp="shapefiles/blockgroups.shp", dbf="shapefiles/blockgroups.dbf" + ) as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_shp_only(): @@ -523,7 +780,7 @@ def test_reader_shp_only(): with shapefile.Reader(shp="shapefiles/blockgroups.shp") as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 def test_reader_filelike_dbf_only(): @@ -535,7 +792,7 @@ def test_reader_filelike_dbf_only(): with shapefile.Reader(dbf=open("shapefiles/blockgroups.dbf", "rb")) as sf: assert len(sf) == 663 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_filelike_shp_shx_only(): @@ -544,10 +801,13 @@ def test_reader_filelike_shp_shx_only(): shp and shx argument to the shapefile reader reads just the shp and shx file. """ - with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb"), shx=open("shapefiles/blockgroups.shx", "rb")) as sf: + with shapefile.Reader( + shp=open("shapefiles/blockgroups.shp", "rb"), + shx=open("shapefiles/blockgroups.shx", "rb"), + ) as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 def test_reader_filelike_shp_dbf_only(): @@ -556,12 +816,15 @@ def test_reader_filelike_shp_dbf_only(): shp and shx argument to the shapefile reader reads just the shp and dbf file. """ - with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb"), dbf=open("shapefiles/blockgroups.dbf", "rb")) as sf: + with shapefile.Reader( + shp=open("shapefiles/blockgroups.shp", "rb"), + dbf=open("shapefiles/blockgroups.dbf", "rb"), + ) as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_filelike_shp_only(): @@ -573,7 +836,7 @@ def test_reader_filelike_shp_only(): with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb")) as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 def test_reader_shapefile_delayed_load(): @@ -584,7 +847,7 @@ def test_reader_shapefile_delayed_load(): with shapefile.Reader() as sf: # assert that data request raises exception, since no file has been provided yet with pytest.raises(shapefile.ShapefileException): - sf.shape(0) + sf.shape(0) # assert that works after loading file manually sf.load("shapefiles/blockgroups") assert len(sf) == 663 @@ -603,7 +866,7 @@ def test_records_match_shapes(): def test_record_attributes(fields=None): """ - Assert that record retrieves all relevant values and can + Assert that record retrieves all relevant values and can be accessed as attributes and dictionary items. """ # note @@ -620,7 +883,9 @@ def test_record_attributes(fields=None): else: # default all fields record = full_record - fields = [field[0] for field in sf.fields[1:]] # fieldnames, sans del flag + fields = [ + field[0] for field in sf.fields[1:] + ] # fieldnames, sans del flag # check correct length assert len(record) == len(set(fields)) # check record values (should be in same order as shapefile fields) @@ -628,26 +893,28 @@ def test_record_attributes(fields=None): for field in sf.fields: field_name = field[0] if field_name in fields: - assert record[i] == record[field_name] == getattr(record, field_name) + assert ( + record[i] == record[field_name] == getattr(record, field_name) + ) i += 1 def test_record_subfields(): """ - Assert that reader correctly retrieves only a subset + Assert that reader correctly retrieves only a subset of fields when specified. """ - fields = ["AREA","POP1990","MALES","FEMALES","MOBILEHOME"] + fields = ["AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"] test_record_attributes(fields=fields) def test_record_subfields_unordered(): """ - Assert that reader correctly retrieves only a subset - of fields when specified, given in random order but - retrieved in the order of the shapefile fields. + Assert that reader correctly retrieves only a subset + of fields when specified, given in random order but + retrieved in the order of the shapefile fields. """ - fields = sorted(["AREA","POP1990","MALES","FEMALES","MOBILEHOME"]) + fields = sorted(["AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"]) test_record_attributes(fields=fields) @@ -655,7 +922,7 @@ def test_record_subfields_delflag_notvalid(): """ Assert that reader does not consider DeletionFlag as a valid field name. """ - fields = ["DeletionFlag","AREA","POP1990","MALES","FEMALES","MOBILEHOME"] + fields = ["DeletionFlag", "AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"] with pytest.raises(ValueError): test_record_attributes(fields=fields) @@ -663,9 +930,9 @@ def test_record_subfields_delflag_notvalid(): def test_record_subfields_duplicates(): """ Assert that reader correctly retrieves only a subset - of fields when specified, handling duplicate input fields. + of fields when specified, handling duplicate input fields. """ - fields = ["AREA","AREA","AREA","MALES","MALES","MOBILEHOME"] + fields = ["AREA", "AREA", "AREA", "MALES", "MALES", "MOBILEHOME"] test_record_attributes(fields=fields) # check that only 3 values with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -676,7 +943,7 @@ def test_record_subfields_duplicates(): def test_record_subfields_empty(): """ Assert that reader does not retrieve any fields when given - an empty list. + an empty list. """ fields = [] test_record_attributes(fields=fields) @@ -710,16 +977,61 @@ def test_record_oid(): record = sf.record(i) assert record.oid == i - for i,record in enumerate(sf.records()): + for i, record in enumerate(sf.records()): assert record.oid == i - for i,record in enumerate(sf.iterRecords()): + for i, record in enumerate(sf.iterRecords()): assert record.oid == i - for i,shaperec in enumerate(sf.iterShapeRecords()): + for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.record.oid == i +def test_iterRecords_start_stop(): + """ + Assert that Reader.iterRecords(start, stop) + returns the correct records, as if searched for + by index with Reader.record + """ + + with shapefile.Reader("shapefiles/blockgroups") as sf: + N = len(sf) + + # Arbitrary selection of record indices + # (there are 663 records in blockgroups.dbf). + for i in [ + 0, + 1, + 2, + 3, + 5, + 11, + 17, + 33, + 51, + 103, + 170, + 234, + 435, + 543, + N - 3, + N - 2, + N - 1, + ]: + for record in sf.iterRecords(start=i): + assert record == sf.record(record.oid) + + for record in sf.iterRecords(stop=i): + assert record == sf.record(record.oid) + + for stop in range(i, len(sf)): + # test negative indexing from end, as well as + # positive values of stop, and its default + for stop_arg in (stop, stop - len(sf)): + for record in sf.iterRecords(start=i, stop=stop_arg): + assert record == sf.record(record.oid) + + def test_shape_oid(): """ Assert that the shape's oid attribute returns @@ -730,13 +1042,13 @@ def test_shape_oid(): shape = sf.shape(i) assert shape.oid == i - for i,shape in enumerate(sf.shapes()): + for i, shape in enumerate(sf.shapes()): assert shape.oid == i - for i,shape in enumerate(sf.iterShapes()): + for i, shape in enumerate(sf.iterShapes()): assert shape.oid == i - for i,shaperec in enumerate(sf.iterShapeRecords()): + for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.shape.oid == i @@ -746,76 +1058,77 @@ def test_shape_oid_no_shx(): its index in the shapefile, when shx file is missing. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - dbf = open(basename + ".dbf", 'rb') - with shapefile.Reader(shp=shp, dbf=dbf) as sf, \ - shapefile.Reader(basename) as sf_expected: - for i in range(len(sf)): - shape = sf.shape(i) - assert shape.oid == i - shape_expected = sf_expected.shape(i) - assert shape.__geo_interface__ == shape_expected.__geo_interface__ - - for i,shape in enumerate(sf.shapes()): - assert shape.oid == i - shape_expected = sf_expected.shape(i) - assert shape.__geo_interface__ == shape_expected.__geo_interface__ - - for i,shape in enumerate(sf.iterShapes()): - assert shape.oid == i - shape_expected = sf_expected.shape(i) - assert shape.__geo_interface__ == shape_expected.__geo_interface__ - - for i,shaperec in enumerate(sf.iterShapeRecords()): - assert shaperec.shape.oid == i - shape_expected = sf_expected.shape(i) - assert shaperec.shape.__geo_interface__ == shape_expected.__geo_interface__ + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") + with shapefile.Reader(shp=shp, dbf=dbf) as sf: + with shapefile.Reader(basename) as sf_expected: + for i in range(len(sf)): + shape = sf.shape(i) + assert shape.oid == i + shape_expected = sf_expected.shape(i) + assert shape.__geo_interface__ == shape_expected.__geo_interface__ + + for i, shape in enumerate(sf.shapes()): + assert shape.oid == i + shape_expected = sf_expected.shape(i) + assert shape.__geo_interface__ == shape_expected.__geo_interface__ + + for i, shape in enumerate(sf.iterShapes()): + assert shape.oid == i + shape_expected = sf_expected.shape(i) + assert shape.__geo_interface__ == shape_expected.__geo_interface__ + + for i, shaperec in enumerate(sf.iterShapeRecords()): + assert shaperec.shape.oid == i + shape_expected = sf_expected.shape(i) + assert ( + shaperec.shape.__geo_interface__ == shape_expected.__geo_interface__ + ) def test_reader_offsets(): """ - Assert that reader will not read the shx offsets unless necessary, - i.e. requesting a shape index. + Assert that reader will not read the shx offsets unless necessary, + i.e. requesting a shape index. """ basename = "shapefiles/blockgroups" with shapefile.Reader(basename) as sf: # shx offsets should not be read during loading assert not sf._offsets # reading a shape index should trigger reading offsets from shx file - shape = sf.shape(3) + sf.shape(3) assert len(sf._offsets) == len(sf.shapes()) def test_reader_offsets_no_shx(): """ - Assert that reading a shapefile without a shx file will not build - the offsets unless necessary, i.e. reading all the shapes. + Assert that reading a shapefile without a shx file will not build + the offsets unless necessary, i.e. reading all the shapes. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - dbf = open(basename + ".dbf", 'rb') + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") with shapefile.Reader(shp=shp, dbf=dbf) as sf: # offsets should not be built during loading assert not sf._offsets # reading a shape index should iterate to the shape # but the list of offsets should remain empty - shape = sf.shape(3) + sf.shape(3) assert not sf._offsets # reading all the shapes should build the list of offsets shapes = sf.shapes() assert len(sf._offsets) == len(shapes) - def test_reader_numshapes(): """ Assert that reader reads the numShapes attribute from the - shx file header during loading. + shx file header during loading. """ basename = "shapefiles/blockgroups" with shapefile.Reader(basename) as sf: # numShapes should be set during loading - assert sf.numShapes != None + assert sf.numShapes is not None # numShapes should equal the number of shapes assert sf.numShapes == len(sf.shapes()) @@ -827,11 +1140,11 @@ def test_reader_numshapes_no_shx(): reading all the shapes will set the numShapes attribute. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - dbf = open(basename + ".dbf", 'rb') + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") with shapefile.Reader(shp=shp, dbf=dbf) as sf: # numShapes should be unknown due to missing shx file - assert sf.numShapes == None + assert sf.numShapes is None # numShapes should be set after reading all the shapes shapes = sf.shapes() assert sf.numShapes == len(shapes) @@ -839,8 +1152,8 @@ def test_reader_numshapes_no_shx(): def test_reader_len(): """ - Assert that calling len() on reader is equal to length of - all shapes and records. + Assert that calling len() on reader is equal to length of + all shapes and records. """ basename = "shapefiles/blockgroups" with shapefile.Reader(basename) as sf: @@ -850,7 +1163,7 @@ def test_reader_len(): def test_reader_len_not_loaded(): """ Assert that calling len() on reader that hasn't loaded a shapefile - yet is equal to 0. + yet is equal to 0. """ with shapefile.Reader() as sf: assert len(sf) == 0 @@ -859,10 +1172,10 @@ def test_reader_len_not_loaded(): def test_reader_len_dbf_only(): """ Assert that calling len() on reader when reading a dbf file only, - is equal to length of all records. + is equal to length of all records. """ basename = "shapefiles/blockgroups" - dbf = open(basename + ".dbf", 'rb') + dbf = open(basename + ".dbf", "rb") with shapefile.Reader(dbf=dbf) as sf: assert len(sf) == len(sf.records()) @@ -870,11 +1183,11 @@ def test_reader_len_dbf_only(): def test_reader_len_no_dbf(): """ Assert that calling len() on reader when dbf file is missing, - is equal to length of all shapes. + is equal to length of all shapes. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - shx = open(basename + ".shx", 'rb') + shp = open(basename + ".shp", "rb") + shx = open(basename + ".shx", "rb") with shapefile.Reader(shp=shp, shx=shx) as sf: assert len(sf) == len(sf.shapes()) @@ -882,10 +1195,10 @@ def test_reader_len_no_dbf(): def test_reader_len_no_dbf_shx(): """ Assert that calling len() on reader when dbf and shx file is missing, - is equal to length of all shapes. + is equal to length of all shapes. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') + shp = open(basename + ".shp", "rb") with shapefile.Reader(shp=shp) as sf: assert len(sf) == len(sf.shapes()) @@ -893,7 +1206,7 @@ def test_reader_len_no_dbf_shx(): def test_reader_corrupt_files(): """ Assert that reader is able to handle corrupt files by - strictly going off the header information. + strictly going off the header information. """ basename = "shapefiles/test/corrupt_too_long" @@ -903,10 +1216,10 @@ def test_reader_corrupt_files(): # add 10 line geoms for _ in range(10): w.record("value") - w.line([[(1,1),(1,2),(2,2)]]) + w.line([[(1, 1), (1, 2), (2, 2)]]) # add junk byte data to end of dbf and shp files - w.dbf.write(b'12345') - w.shp.write(b'12345') + w.dbf.write(b"12345") + w.shp.write(b"12345") # read the corrupt shapefile and assert that it reads correctly with shapefile.Reader(basename) as sf: @@ -931,7 +1244,7 @@ def test_reader_corrupt_files(): def test_bboxfilter_shape(): """ Assert that applying the bbox filter to shape() correctly ignores the shape - if it falls outside, and returns it if inside. + if it falls outside, and returns it if inside. """ inside = [-122.4, 37.8, -122.35, 37.82] outside = list(inside) @@ -945,7 +1258,7 @@ def test_bboxfilter_shape(): def test_bboxfilter_shapes(): """ Assert that applying the bbox filter to shapes() correctly ignores shapes - that fall outside, and returns those that fall inside. + that fall outside, and returns those that fall inside. """ bbox = [-122.4, 37.8, -122.35, 37.82] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -959,7 +1272,7 @@ def test_bboxfilter_shapes(): # compare assert len(shapes) == len(manual) # check that they line up - for shape,man in zip(shapes,manual): + for shape, man in zip(shapes, manual): assert shape.oid == man.oid assert shape.__geo_interface__ == man.__geo_interface__ @@ -967,7 +1280,7 @@ def test_bboxfilter_shapes(): def test_bboxfilter_shapes_outside(): """ Assert that applying the bbox filter to shapes() correctly returns - no shapes when the bbox is outside the entire shapefile. + no shapes when the bbox is outside the entire shapefile. """ bbox = [-180, 89, -179, 90] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -978,7 +1291,7 @@ def test_bboxfilter_shapes_outside(): def test_bboxfilter_itershapes(): """ Assert that applying the bbox filter to iterShapes() correctly ignores shapes - that fall outside, and returns those that fall inside. + that fall outside, and returns those that fall inside. """ bbox = [-122.4, 37.8, -122.35, 37.82] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -992,7 +1305,7 @@ def test_bboxfilter_itershapes(): # compare assert len(shapes) == len(manual) # check that they line up - for shape,man in zip(shapes,manual): + for shape, man in zip(shapes, manual): assert shape.oid == man.oid assert shape.__geo_interface__ == man.__geo_interface__ @@ -1000,7 +1313,7 @@ def test_bboxfilter_itershapes(): def test_bboxfilter_shaperecord(): """ Assert that applying the bbox filter to shapeRecord() correctly ignores the shape - if it falls outside, and returns it if inside. + if it falls outside, and returns it if inside. """ inside = [-122.4, 37.8, -122.35, 37.82] outside = list(inside) @@ -1018,7 +1331,7 @@ def test_bboxfilter_shaperecord(): def test_bboxfilter_shaperecords(): """ Assert that applying the bbox filter to shapeRecords() correctly ignores shapes - that fall outside, and returns those that fall inside. + that fall outside, and returns those that fall inside. """ bbox = [-122.4, 37.8, -122.35, 37.82] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -1032,7 +1345,7 @@ def test_bboxfilter_shaperecords(): # compare assert len(shaperecs) == len(manual) # check that they line up - for shaperec,man in zip(shaperecs,manual): + for shaperec, man in zip(shaperecs, manual): # oids assert shaperec.shape.oid == shaperec.record.oid # same shape as manual @@ -1046,7 +1359,7 @@ def test_bboxfilter_shaperecords(): def test_bboxfilter_itershaperecords(): """ Assert that applying the bbox filter to iterShapeRecords() correctly ignores shapes - that fall outside, and returns those that fall inside. + that fall outside, and returns those that fall inside. """ bbox = [-122.4, 37.8, -122.35, 37.82] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -1060,7 +1373,7 @@ def test_bboxfilter_itershaperecords(): # compare assert len(shaperecs) == len(manual) # check that they line up - for shaperec,man in zip(shaperecs,manual): + for shaperec, man in zip(shaperecs, manual): # oids assert shaperec.shape.oid == shaperec.record.oid # same shape as manual @@ -1101,7 +1414,7 @@ def test_shaperecord_shape(): shaperec = sf.shapeRecord(3) shape = shaperec.shape point = shape.points[0] - assert len(point) is 2 + assert len(point) == 2 def test_shaperecord_record(): @@ -1113,7 +1426,7 @@ def test_shaperecord_record(): shaperec = sf.shapeRecord(3) record = shaperec.record - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_write_field_name_limit(tmpdir): @@ -1122,11 +1435,11 @@ def test_write_field_name_limit(tmpdir): """ filename = tmpdir.join("test.shp").strpath with shapefile.Writer(filename) as writer: - writer.field('a'*5, 'C') # many under length limit - writer.field('a'*9, 'C') # 1 under length limit - writer.field('a'*10, 'C') # at length limit - writer.field('a'*11, 'C') # 1 over length limit - writer.field('a'*20, 'C') # many over limit + writer.field("a" * 5, "C") # many under length limit + writer.field("a" * 9, "C") # 1 under length limit + writer.field("a" * 10, "C") # at length limit + writer.field("a" * 11, "C") # 1 over length limit + writer.field("a" * 20, "C") # many over limit with shapefile.Reader(filename) as reader: fields = reader.fields[1:] @@ -1144,27 +1457,30 @@ def test_write_shp_only(tmpdir): creates just a shp file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(shp=filename+'.shp') as writer: + with shapefile.Writer(shp=filename + ".shp") as writer: writer.point(1, 1) assert writer.shp and not writer.shx and not writer.dbf assert writer.shpNum == 1 assert len(writer) == 1 - assert writer.shp.closed == True + assert writer.shp.closed is True # assert test.shp exists - assert os.path.exists(filename+'.shp') - + assert os.path.exists(filename + ".shp") + # test that can read shapes - with shapefile.Reader(shp=filename+'.shp') as reader: + with shapefile.Reader(shp=filename + ".shp") as reader: assert reader.shp and not reader.shx and not reader.dbf - assert (reader.numRecords, reader.numShapes) == (None, None) # numShapes is unknown in the absence of shx file + assert (reader.numRecords, reader.numShapes) == ( + None, + None, + ) # numShapes is unknown in the absence of shx file assert len(reader.shapes()) == 1 # assert test.shx does not exist - assert not os.path.exists(filename+'.shx') + assert not os.path.exists(filename + ".shx") # assert test.dbf does not exist - assert not os.path.exists(filename+'.dbf') + assert not os.path.exists(filename + ".dbf") def test_write_shp_shx_only(tmpdir): @@ -1174,29 +1490,29 @@ def test_write_shp_shx_only(tmpdir): creates just a shp and shx file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(shp=filename+'.shp', shx=filename+'.shx') as writer: + with shapefile.Writer(shp=filename + ".shp", shx=filename + ".shx") as writer: writer.point(1, 1) assert writer.shp and writer.shx and not writer.dbf assert writer.shpNum == 1 assert len(writer) == 1 - assert writer.shp.closed == writer.shx.closed == True + assert writer.shp.closed is writer.shx.closed is True # assert test.shp exists - assert os.path.exists(filename+'.shp') + assert os.path.exists(filename + ".shp") # assert test.shx exists - assert os.path.exists(filename+'.shx') + assert os.path.exists(filename + ".shx") # test that can read shapes and offsets - with shapefile.Reader(shp=filename+'.shp', shx=filename+'.shx') as reader: + with shapefile.Reader(shp=filename + ".shp", shx=filename + ".shx") as reader: assert reader.shp and reader.shx and not reader.dbf assert (reader.numRecords, reader.numShapes) == (None, 1) - reader.shape(0) # trigger reading of shx offsets + reader.shape(0) # trigger reading of shx offsets assert len(reader._offsets) == 1 assert len(reader.shapes()) == 1 # assert test.dbf does not exist - assert not os.path.exists(filename+'.dbf') + assert not os.path.exists(filename + ".dbf") def test_write_shp_dbf_only(tmpdir): @@ -1206,30 +1522,33 @@ def test_write_shp_dbf_only(tmpdir): creates just a shp and dbf file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(shp=filename+'.shp', dbf=filename+'.dbf') as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + with shapefile.Writer(shp=filename + ".shp", dbf=filename + ".dbf") as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") writer.point(1, 1) assert writer.shp and not writer.shx and writer.dbf assert writer.shpNum == writer.recNum == 1 assert len(writer) == 1 - assert writer.shp.closed == writer.dbf.closed == True + assert writer.shp.closed is writer.dbf.closed is True # assert test.shp exists - assert os.path.exists(filename+'.shp') + assert os.path.exists(filename + ".shp") # assert test.dbf exists - assert os.path.exists(filename+'.dbf') - + assert os.path.exists(filename + ".dbf") + # test that can read records and shapes - with shapefile.Reader(shp=filename+'.shp', dbf=filename+'.dbf') as reader: + with shapefile.Reader(shp=filename + ".shp", dbf=filename + ".dbf") as reader: assert reader.shp and not reader.shx and reader.dbf - assert (reader.numRecords, reader.numShapes) == (1, None) # numShapes is unknown in the absence of shx file + assert (reader.numRecords, reader.numShapes) == ( + 1, + None, + ) # numShapes is unknown in the absence of shx file assert len(reader.records()) == 1 assert len(reader.shapes()) == 1 # assert test.shx does not exist - assert not os.path.exists(filename+'.shx') + assert not os.path.exists(filename + ".shx") def test_write_dbf_only(tmpdir): @@ -1239,28 +1558,28 @@ def test_write_dbf_only(tmpdir): creates just a dbf file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(dbf=filename+'.dbf') as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + with shapefile.Writer(dbf=filename + ".dbf") as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") assert not writer.shp and not writer.shx and writer.dbf assert writer.recNum == 1 assert len(writer) == 1 - assert writer.dbf.closed == True + assert writer.dbf.closed is True # assert test.dbf exists - assert os.path.exists(filename+'.dbf') + assert os.path.exists(filename + ".dbf") # test that can read records - with shapefile.Reader(dbf=filename+'.dbf') as reader: + with shapefile.Reader(dbf=filename + ".dbf") as reader: assert not writer.shp and not writer.shx and writer.dbf assert (reader.numRecords, reader.numShapes) == (1, None) assert len(reader.records()) == 1 # assert test.shp does not exist - assert not os.path.exists(filename+'.shp') + assert not os.path.exists(filename + ".shp") # assert test.shx does not exist - assert not os.path.exists(filename+'.shx') + assert not os.path.exists(filename + ".shx") def test_write_default_shp_shx_dbf(tmpdir): @@ -1271,8 +1590,8 @@ def test_write_default_shp_shx_dbf(tmpdir): """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename) as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") writer.null() # assert shp, shx, dbf files exist @@ -1289,8 +1608,8 @@ def test_write_pathlike(tmpdir): filename = tmpdir.join("test") assert not isinstance(filename, str) with shapefile.Writer(filename) as writer: - writer.field('field1', 'C') - writer.record('value') + writer.field("field1", "C") + writer.record("value") writer.null() assert (filename + ".shp").ensure() assert (filename + ".shx").ensure() @@ -1301,14 +1620,14 @@ def test_write_filelike(tmpdir): """ Assert that file-like objects are written correctly. """ - shp = open(tmpdir.join("test.shp").strpath, mode='wb+') - shx = open(tmpdir.join("test.shx").strpath, mode='wb+') - dbf = open(tmpdir.join("test.dbf").strpath, mode='wb+') + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") with shapefile.Writer(shx=shx, dbf=dbf, shp=shp) as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") writer.null() - + # test that filelike objects were written correctly with shapefile.Reader(shp=shp, shx=shx, dbf=dbf) as reader: assert len(reader) == 1 @@ -1321,9 +1640,9 @@ def test_write_close_path(tmpdir): closes the shp, shx, and dbf files on exit, if given paths. """ - sf = shapefile.Writer(tmpdir.join('test')) - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + sf = shapefile.Writer(tmpdir.join("test")) + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() sf.close() @@ -1332,7 +1651,7 @@ def test_write_close_path(tmpdir): assert sf.shx.closed is True # test that opens and reads correctly after - with shapefile.Reader(tmpdir.join('test')) as reader: + with shapefile.Reader(tmpdir.join("test")) as reader: assert len(reader) == 1 assert reader.shape(0).shapeType == shapefile.NULL @@ -1343,12 +1662,12 @@ def test_write_close_filelike(tmpdir): leaves the shp, shx, and dbf files open on exit, if given filelike objects. """ - shp = open(tmpdir.join("test.shp").strpath, mode='wb+') - shx = open(tmpdir.join("test.shx").strpath, mode='wb+') - dbf = open(tmpdir.join("test.dbf").strpath, mode='wb+') + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") sf = shapefile.Writer(shx=shx, dbf=dbf, shp=shp) - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() sf.close() @@ -1368,9 +1687,9 @@ def test_write_context_path(tmpdir): closes the shp, shx, and dbf files on exit, if given paths. """ - with shapefile.Writer(tmpdir.join('test')) as sf: - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + with shapefile.Writer(tmpdir.join("test")) as sf: + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() assert sf.shp.closed is True @@ -1378,7 +1697,7 @@ def test_write_context_path(tmpdir): assert sf.shx.closed is True # test that opens and reads correctly after - with shapefile.Reader(tmpdir.join('test')) as reader: + with shapefile.Reader(tmpdir.join("test")) as reader: assert len(reader) == 1 assert reader.shape(0).shapeType == shapefile.NULL @@ -1389,12 +1708,12 @@ def test_write_context_filelike(tmpdir): leaves the shp, shx, and dbf files open on exit, if given filelike objects. """ - shp = open(tmpdir.join("test.shp").strpath, mode='wb+') - shx = open(tmpdir.join("test.shx").strpath, mode='wb+') - dbf = open(tmpdir.join("test.dbf").strpath, mode='wb+') + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") with shapefile.Writer(shx=shx, dbf=dbf, shp=shp) as sf: - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() assert sf.shp.closed is False @@ -1416,7 +1735,7 @@ def test_write_shapefile_extension_ignored(tmpdir): ext = ".abc" filename = tmpdir.join(base + ext).strpath with shapefile.Writer(filename) as writer: - writer.field('field1', 'C') # required to create a valid dbf file + writer.field("field1", "C") # required to create a valid dbf file # assert shp, shx, dbf files exist basepath = tmpdir.join(base).strpath @@ -1431,18 +1750,18 @@ def test_write_shapefile_extension_ignored(tmpdir): def test_write_record(tmpdir): """ Test that .record() correctly writes a record using either a list of *args - or a dict of **kwargs. + or a dict of **kwargs. """ filename = tmpdir.join("test.shp").strpath with shapefile.Writer(filename) as writer: writer.autoBalance = True - writer.field('one', 'C') - writer.field('two', 'C') - writer.field('three', 'C') - writer.field('four', 'C') - - values = ['one','two','three','four'] + writer.field("one", "C") + writer.field("two", "C") + writer.field("three", "C") + writer.field("four", "C") + + values = ["one", "two", "three", "four"] writer.record(*values) writer.record(*values) @@ -1458,18 +1777,18 @@ def test_write_record(tmpdir): def test_write_partial_record(tmpdir): """ Test that .record() correctly writes a partial record (given only some of the values) - using either a list of *args or a dict of **kwargs. Should fill in the gaps. + using either a list of *args or a dict of **kwargs. Should fill in the gaps. """ filename = tmpdir.join("test.shp").strpath with shapefile.Writer(filename) as writer: writer.autoBalance = True - - writer.field('one', 'C') - writer.field('two', 'C') - writer.field('three', 'C') - writer.field('four', 'C') - - values = ['one','two'] + + writer.field("one", "C") + writer.field("two", "C") + writer.field("three", "C") + writer.field("four", "C") + + values = ["one", "two"] writer.record(*values) writer.record(*values) @@ -1479,7 +1798,7 @@ def test_write_partial_record(tmpdir): with shapefile.Reader(filename) as reader: expected = list(values) - expected.extend(['','']) + expected.extend(["", ""]) for record in reader.iterRecords(): assert record == expected @@ -1492,13 +1811,13 @@ def test_write_geojson(tmpdir): """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename) as w: - w.field('TEXT', 'C') - w.field('NUMBER', 'N') - w.field('DATE', 'D') - w.record('text', 123, datetime.date(1898,1,30)) - w.record('text', 123, [1998,1,30]) - w.record('text', 123, '19980130') - w.record('text', 123, '-9999999') # faulty date + w.field("TEXT", "C") + w.field("NUMBER", "N") + w.field("DATE", "D") + w.record("text", 123, datetime.date(1898, 1, 30)) + w.record("text", 123, [1998, 1, 30]) + w.record("text", 123, "19980130") + w.record("text", 123, "-9999999") # faulty date w.record(None, None, None) w.null() w.null() @@ -1513,17 +1832,19 @@ def test_write_geojson(tmpdir): assert json.dumps(r.__geo_interface__) -shape_types = [k for k in shapefile.SHAPETYPE_LOOKUP.keys() if k != 31] # exclude multipatch +shape_types = [ + k for k in shapefile.SHAPETYPE_LOOKUP.keys() if k != 31 +] # exclude multipatch @pytest.mark.parametrize("shape_type", shape_types) def test_write_empty_shapefile(tmpdir, shape_type): """ - Assert that can write an empty shapefile, for all different shape types. + Assert that can write an empty shapefile, for all different shape types. """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename, shapeType=shape_type) as w: - w.field('field1', 'C') # required to create a valid dbf file + w.field("field1", "C") # required to create a valid dbf file with shapefile.Reader(filename) as r: # test correct shape type