diff --git a/.github/workflows/speed_test.yml b/.github/workflows/speed_test.yml new file mode 100644 index 0000000..3dfd504 --- /dev/null +++ b/.github/workflows/speed_test.yml @@ -0,0 +1,56 @@ + +name: Run the speed tests + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + + run_speed_tests: + strategy: + fail-fast: false + matrix: + python-version: [ + "3.9", + "3.13", + ] + os: [ + "windows-latest", + "ubuntu-24.04", + ] + + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + + - uses: actions/checkout@v4 + with: + path: ./Pyshp + + - name: Install PyShp + test deps + shell: bash + working-directory: ./Pyshp + run: | + python -m pip install -r requirements.test.txt + python -m pip install -e . + + + - name: Checkout shapefiles and zip file artefacts repo + uses: actions/checkout@v4 + with: + repository: JamesParrott/PyShp_test_shapefile + path: ./PyShp_test_shapefile + + - name: Run Speed tests. + env: + PYSHP_TEST_REPO: ./PyShp_test_shapefile + run: python Pyshp/run_benchmarks.py + + + diff --git a/README.md b/README.md index c55e204..ad8ffca 100644 --- a/README.md +++ b/README.md @@ -95,10 +95,13 @@ part of your geospatial project. # Version Changes -## 2.4.0 +## 2.4.1 + +### Improvements: +- Speed up writing shapefiles by up to ~39%. Combined for loops of calls to f.write(pack(...)), into single calls. ### Breaking Change. Support for Python 2 and Pythons <= 3.8 to be dropped. -- PyShp 2.4.0 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. +- PyShp 2.4.1 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. These CPython versions have reached [end of life](https://devguide.python.org/versions/#versions). - Future development will focus on PyShp v3.0.0 onwards (currently intended to supporting Pythons >= 3.9). - This will not break any projects, as pip and other package managers should not install PyShp 3.0.0 @@ -107,6 +110,7 @@ bug fixes and features. - If this negatively impacts your project, all feedback about this decision is welcome on our [the discussion page](https://github.com/GeospatialPython/pyshp/discussions/290). +## 2.4.0 ### New Features: - Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. diff --git a/changelog.txt b/changelog.txt index 533d704..79a367f 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,3 +1,32 @@ +VERSION 2.4.1 + +2025-07-30 + Improvements: + * Speed up writing shapefiles by up to ~39%. Combined for loops of calls to f.write(pack(...)), into single calls. + + Forthcoming Breaking Change. Support for Python 2 and Pythons <= 3.8 to be dropped. + * PyShp 2.4.1 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. + These CPython versions have reached [end of life](https://devguide.python.org/versions/#versions). + * Future development will focus on PyShp v3.0.0 onwards (currently intended to supporting Pythons >= 3.9). + * This will not break any projects, as pip and other package managers should not install PyShp 3.0.0 + (after its release) in unsupported Pythons. But we no longer promise such projects will get PyShp's latest + bug fixes and features. + * If this negatively impacts your project, all feedback about this decision is welcome + on our [the discussion page](https://github.com/GeospatialPython/pyshp/discussions/290). + +VERSION 2.4.0 + +2025-07-21 + + + New Features: + * Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. + * Equality comparisons between Records now also require the fields to be the same (and in the same order). + + Development: + * Code quality tools (Ruff format) run on PyShp + * Network, non-network, or all doctests selectable via command line args + * Network tests made runnable on localhost. VERSION 2.3.1 diff --git a/run_benchmarks.py b/run_benchmarks.py new file mode 100644 index 0000000..edc2119 --- /dev/null +++ b/run_benchmarks.py @@ -0,0 +1,123 @@ +# Based on Taneli Hukkinen's https://github.com/hukkin/tomli-w/blob/master/benchmark/run.py + +from __future__ import annotations + +import collections +import functools +import os +import timeit +from collections.abc import Callable +from pathlib import Path +from tempfile import TemporaryFile as TempF +from typing import Union + +import shapefile + +# For shapefiles from https://github.com/JamesParrott/PyShp_test_shapefile +DEFAULT_PYSHP_TEST_REPO = ( + rf"{os.getenv('USERPROFILE')}\Coding\repos\PyShp_test_shapefile" +) +PYSHP_TEST_REPO = Path(os.getenv("PYSHP_TEST_REPO", DEFAULT_PYSHP_TEST_REPO)) +REPO_ROOT = Path(__file__).parent + + +blockgroups_file = REPO_ROOT / "shapefiles" / "blockgroups.shp" +edit_file = REPO_ROOT / "shapefiles" / "test" / "edit.shp" +merge_file = REPO_ROOT / "shapefiles" / "test" / "merge.shp" +states_provinces_file = PYSHP_TEST_REPO / "ne_10m_admin_1_states_provinces.shp" +tiny_countries_file = PYSHP_TEST_REPO / "ne_110m_admin_0_tiny_countries.shp" +gis_osm_natural_file = PYSHP_TEST_REPO / "gis_osm_natural_a_free_1.zip" + + +def benchmark( + name: str, + run_count: int, + func: Callable, + col_widths: tuple, + compare_to: float | None = None, +) -> float: + placeholder = "Running..." + print(f"{name:>{col_widths[0]}} | {placeholder}", end="", flush=True) + time_taken = timeit.timeit(func, number=run_count) + print("\b" * len(placeholder), end="") + time_suffix = " s" + print(f"{time_taken:{col_widths[1]-len(time_suffix)}.3g}{time_suffix}", end="") + print() + return time_taken + + +fields = {} +shapeRecords = collections.defaultdict(list) + + +def open_shapefile_with_PyShp(target: Union[str, os.PathLike]): + with shapefile.Reader(target) as r: + fields[target] = r.fields + for shapeRecord in r.iterShapeRecords(): + shapeRecords[target].append(shapeRecord) + + +def write_shapefile_with_PyShp(target: Union[str, os.PathLike]): + with TempF("wb") as shp, TempF("wb") as dbf, TempF("wb") as shx: + with shapefile.Writer(shp=shp, dbf=dbf, shx=shx) as w: # type: ignore [arg-type] + for field_info_tuple in fields[target]: + w.field(*field_info_tuple) + for shapeRecord in shapeRecords[target]: + w.shape(shapeRecord.shape) + w.record(*shapeRecord.record) + + +SHAPEFILES = { + "Blockgroups": blockgroups_file, + "Edit": edit_file, + "Merge": merge_file, + "States_35MB": states_provinces_file, + "Tiny Countries": tiny_countries_file, + "GIS_OSM_zip_10MB": gis_osm_natural_file, +} + + +# Load files to avoid one off delays that only affect first disk seek +for file_path in SHAPEFILES.values(): + file_path.read_bytes() + +reader_benchmarks = [ + functools.partial( + benchmark, + name=f"Read {test_name}", + func=functools.partial(open_shapefile_with_PyShp, target=target), + ) + for test_name, target in SHAPEFILES.items() +] + +# Require fields and shapeRecords to first have been populated +# from data from previouly running the reader_benchmarks +writer_benchmarks = [ + functools.partial( + benchmark, + name=f"Write {test_name}", + func=functools.partial(write_shapefile_with_PyShp, target=target), + ) + for test_name, target in SHAPEFILES.items() +] + + +def run(run_count: int, benchmarks: list[Callable[[], None]]) -> None: + col_widths = (22, 10) + col_head = ("parser", "exec time", "performance (more is better)") + print(f"Running benchmarks {run_count} times:") + print("-" * col_widths[0] + "---" + "-" * col_widths[1]) + print(f"{col_head[0]:>{col_widths[0]}} | {col_head[1]:>{col_widths[1]}}") + print("-" * col_widths[0] + "-+-" + "-" * col_widths[1]) + for benchmark in benchmarks: + benchmark( # type: ignore [call-arg] + run_count=run_count, + col_widths=col_widths, + ) + + +if __name__ == "__main__": + print("Reader tests:") + run(1, reader_benchmarks) # type: ignore [arg-type] + print("\n\nWriter tests:") + run(1, writer_benchmarks) # type: ignore [arg-type] diff --git a/shapefile.py b/shapefile.py index 211fd48..62405b8 100644 --- a/shapefile.py +++ b/shapefile.py @@ -6,7 +6,7 @@ Compatible with Python versions 2.7-3.x """ -__version__ = "2.4.0" +__version__ = "2.4.1" import array import io @@ -2314,16 +2314,17 @@ def __shpRecord(self, s): f.write(pack(" 2 else 0)) for p in s.points] + zs = [p[2] if len(p) > 2 else 0 for p in s.points] + f.write(pack("<%sd" % len(zs), *zs)) except error: raise ShapefileException( "Failed to write elevation values for record %s. Expected floats." diff --git a/test_shapefile.py b/test_shapefile.py index 1b7182f..bf145ae 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -987,6 +987,7 @@ def test_record_oid(): assert shaperec.record.oid == i +@pytest.mark.slow def test_iterRecords_start_stop(): """ Assert that Reader.iterRecords(start, stop) @@ -999,36 +1000,31 @@ def test_iterRecords_start_stop(): # Arbitrary selection of record indices # (there are 663 records in blockgroups.dbf). - for i in [ + indices = [ 0, 1, 2, - 3, 5, 11, - 17, - 33, - 51, - 103, - 170, - 234, - 435, - 543, + 41, + 310, + 513, N - 3, - N - 2, N - 1, - ]: - for record in sf.iterRecords(start=i): + ] + for i, index in enumerate(indices): + for record in sf.iterRecords(start=index): assert record == sf.record(record.oid) - for record in sf.iterRecords(stop=i): + for record in sf.iterRecords(stop=index): assert record == sf.record(record.oid) - for stop in range(i, len(sf)): + for j in range(i + 1, len(indices)): + stop = indices[j] # test negative indexing from end, as well as # positive values of stop, and its default - for stop_arg in (stop, stop - len(sf)): - for record in sf.iterRecords(start=i, stop=stop_arg): + for stop_arg in (stop, stop - N): + for record in sf.iterRecords(start=index, stop=stop_arg): assert record == sf.record(record.oid)