From b65a2b76968c5fe7698c3f7ea0c429e81a5f870a Mon Sep 17 00:00:00 2001 From: Karim Bahgat Date: Fri, 29 Jul 2022 00:48:44 +0200 Subject: [PATCH 001/115] Fix missing version/date bump in readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 554753b2..042e11c4 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ The Python Shapefile Library (PyShp) reads and writes ESRI Shapefiles in pure Py - **Author**: [Joel Lawhead](https://github.com/GeospatialPython) - **Maintainers**: [Karim Bahgat](https://github.com/karimbahgat) -- **Version**: 2.3.0 -- **Date**: 30 April, 2022 +- **Version**: 2.3.1 +- **Date**: 28 July, 2022 - **License**: [MIT](https://github.com/GeospatialPython/pyshp/blob/master/LICENSE.TXT) ## Contents From 09545a17f63dbdce407678ef51e09ca3c9c01f39 Mon Sep 17 00:00:00 2001 From: bva-bme <62710558+bva-bme@users.noreply.github.com> Date: Tue, 8 Nov 2022 11:14:13 +0100 Subject: [PATCH 002/115] Added uppercase file extension compatibility for zip files --- shapefile.py | 200 +++++++++++++++++++++++++-------------------------- 1 file changed, 100 insertions(+), 100 deletions(-) diff --git a/shapefile.py b/shapefile.py index 04fb5ec0..19e53940 100644 --- a/shapefile.py +++ b/shapefile.py @@ -85,7 +85,7 @@ from urllib.parse import urlparse, urlunparse from urllib.error import HTTPError from urllib.request import urlopen, Request - + else: from itertools import izip @@ -97,7 +97,7 @@ # Helpers MISSING = [None,''] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. if PYTHON3: def b(v, encoding='utf-8', encodingErrors='strict'): @@ -207,7 +207,7 @@ def signed_area(coords, fast=False): def is_cw(coords): """Returns True if a polygon ring has clockwise orientation, determined - by a negatively signed area. + by a negatively signed area. """ area2 = signed_area(coords, fast=True) return area2 < 0 @@ -245,7 +245,7 @@ def ring_contains_point(coords, p): Adapted from code by Eric Haynes http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c - + Original description: Shoot a test ray along +X axis. The strategy, from MacMartin, is to compare vertex Y values to the testing point's Y and quickly discard @@ -258,11 +258,11 @@ def ring_contains_point(coords, p): yflag0 = ( vtx0[1] >= ty ) inside_flag = False - for vtx1 in coords[1:]: + for vtx1 in coords[1:]: yflag1 = ( vtx1[1] >= ty ) # check if endpoints straddle (are on opposite sides) of X axis # (i.e. the Y's differ); if so, +X ray could intersect this edge. - if yflag0 != yflag1: + if yflag0 != yflag1: xflag0 = ( vtx0[0] >= tx ) # check if endpoints are on same side of the Y axis (i.e. X's # are the same); if so, it's easy to test if edge hits or misses. @@ -287,7 +287,7 @@ def ring_sample(coords, ccw=False): finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. The orientation of the ring is assumed to be clockwise, unless ccw - (counter-clockwise) is set to True. + (counter-clockwise) is set to True. """ triplet = [] def itercoords(): @@ -296,12 +296,12 @@ def itercoords(): yield p # finally, yield the second coordinate to the end to allow checking the last triplet yield coords[1] - - for p in itercoords(): + + for p in itercoords(): # add point to triplet (but not if duplicate) if p not in triplet: triplet.append(p) - + # new triplet, try to get sample if len(triplet) == 3: # check that triplet does not form a straight line (not a triangle) @@ -322,7 +322,7 @@ def itercoords(): # failed to get sample point from this triplet # remove oldest triplet coord to allow iterating to next triplet triplet.pop(0) - + else: raise Exception('Unexpected error: Unable to find a ring sample point.') @@ -334,14 +334,14 @@ def ring_contains_ring(coords1, coords2): def organize_polygon_rings(rings, return_errors=None): '''Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior - ring, and one or more interior holes. If a return_errors dict is provided (optional), - any errors encountered will be added to it. + ring, and one or more interior holes. If a return_errors dict is provided (optional), + any errors encountered will be added to it. Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). Rings are determined as exteriors if they run in clockwise direction, or interior holes if they run in counter-clockwise direction. This method is used to construct GeoJSON (multi)polygons from the shapefile polygon shape type, which does not - explicitly store the structure of the polygons beyond exterior/interior ring orientation. + explicitly store the structure of the polygons beyond exterior/interior ring orientation. ''' # first iterate rings and classify as exterior or hole exteriors = [] @@ -355,7 +355,7 @@ def organize_polygon_rings(rings, return_errors=None): else: # ring is a hole holes.append(ring) - + # if only one exterior, then all holes belong to that exterior if len(exteriors) == 1: # exit early @@ -374,7 +374,7 @@ def organize_polygon_rings(rings, return_errors=None): poly = [ext] polys.append(poly) return polys - + # first determine each hole's candidate exteriors based on simple bbox contains test hole_exteriors = dict([(hole_i,[]) for hole_i in xrange(len(holes))]) exterior_bboxes = [ring_bbox(ring) for ring in exteriors] @@ -386,7 +386,7 @@ def organize_polygon_rings(rings, return_errors=None): # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test for hole_i,exterior_candidates in hole_exteriors.items(): - + if len(exterior_candidates) > 1: # get hole sample point ccw = not is_cw(holes[hole_i]) @@ -404,7 +404,7 @@ def organize_polygon_rings(rings, return_errors=None): # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole for hole_i,exterior_candidates in hole_exteriors.items(): - + if len(exterior_candidates) > 1: # exterior candidate with the smallest area is the hole's most immediate parent ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x], fast=True)))[0] @@ -463,17 +463,17 @@ def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid= geometry record then those shapes are called parts. Parts are designated by their starting index in geometry record's list of shapes. For MultiPatch geometry, partTypes designates - the patch type of each of the parts. + the patch type of each of the parts. """ self.shapeType = shapeType self.points = points or [] self.parts = parts or [] if partTypes: self.partTypes = partTypes - + # and a dict to silently record any errors encountered self._errors = {} - + # add oid if oid is not None: self.__oid = oid @@ -557,12 +557,12 @@ def __geo_interface__(self): rings.append(ring) # organize rings into list of polygons, where each polygon is defined as list of rings. - # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). polys = organize_polygon_rings(rings, self._errors) - + # if VERBOSE is True, issue detailed warning about any shape errors # encountered during the Shapefile to GeoJSON conversion - if VERBOSE and self._errors: + if VERBOSE and self._errors: header = 'Possible issue encountered when converting Shape #{} to GeoJSON: '.format(self.oid) orphans = self._errors.get('polygon_orphaned_holes', None) if orphans: @@ -616,7 +616,7 @@ def _from_geojson(geoj): else: raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) shape.shapeType = shapeType - + # set points and parts if geojType == "Point": shape.points = [ geoj["coordinates"] ] @@ -629,9 +629,9 @@ def _from_geojson(geoj): parts = [] index = 0 for i,ext_or_hole in enumerate(geoj["coordinates"]): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. if i == 0 and not is_cw(ext_or_hole): # flip exterior direction ext_or_hole = rewind(ext_or_hole) @@ -659,9 +659,9 @@ def _from_geojson(geoj): index = 0 for polygon in geoj["coordinates"]: for i,ext_or_hole in enumerate(polygon): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. if i == 0 and not is_cw(ext_or_hole): # flip exterior direction ext_or_hole = rewind(ext_or_hole) @@ -726,7 +726,7 @@ def __getattr__(self, item): :param item: The field name, used as attribute :return: Value of the field :raises: AttributeError, if item is not a field of the shapefile - and IndexError, if the field exists but the field's + and IndexError, if the field exists but the field's corresponding value in the Record does not exist """ try: @@ -821,8 +821,8 @@ def __dir__(self): """ default = list(dir(type(self))) # default list methods and attributes of this class fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) - return default + fnames - + return default + fnames + class ShapeRecord(object): """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" @@ -917,8 +917,8 @@ class Reader(object): but is not required to read the geometry from the .shp file. The "shapefile" argument in the constructor is the name of the file you want to open, and can be the path - to a shapefile on a local filesystem, inside a zipfile, - or a url. + to a shapefile on a local filesystem, inside a zipfile, + or a url. You can instantiate a Reader without specifying a shapefile and then specify one later with the load() method. @@ -980,7 +980,7 @@ def __init__(self, *args, **kwargs): # Inspect zipfile contents to find the full shapefile path shapefiles = [name for name in archive.namelist() - if name.endswith('.shp')] + if (name.endswith('.SHP') or name.endswith('.shp'))] # The zipfile must contain exactly one shapefile if len(shapefiles) == 0: raise ShapefileException('Zipfile does not contain any shapefiles') @@ -991,14 +991,14 @@ def __init__(self, *args, **kwargs): path to the shapefile you would like to open.' % shapefiles ) # Try to extract file-like objects from zipfile shapefile = os.path.splitext(shapefile)[0] # root shapefile name - for ext in ['shp','shx','dbf']: + for ext in ['SHP','SHX','DBF','shp','shx','dbf']: try: member = archive.open(shapefile+'.'+ext) # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) fileobj.write(member.read()) fileobj.seek(0) - setattr(self, ext, fileobj) + setattr(self, ext.lower(), fileobj) self._files_to_close.append(fileobj) except: pass @@ -1047,7 +1047,7 @@ def __init__(self, *args, **kwargs): # Load and exit early self.load(path) return - + # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) if "shp" in kwargs.keys(): if hasattr(kwargs["shp"], "read"): @@ -1060,7 +1060,7 @@ def __init__(self, *args, **kwargs): else: (baseName, ext) = os.path.splitext(kwargs["shp"]) self.load_shp(baseName) - + if "shx" in kwargs.keys(): if hasattr(kwargs["shx"], "read"): self.shx = kwargs["shx"] @@ -1072,7 +1072,7 @@ def __init__(self, *args, **kwargs): else: (baseName, ext) = os.path.splitext(kwargs["shx"]) self.load_shx(baseName) - + if "dbf" in kwargs.keys(): if hasattr(kwargs["dbf"], "read"): self.dbf = kwargs["dbf"] @@ -1084,7 +1084,7 @@ def __init__(self, *args, **kwargs): else: (baseName, ext) = os.path.splitext(kwargs["dbf"]) self.load_dbf(baseName) - + # Load the files if self.shp or self.dbf: self.load() @@ -1120,9 +1120,9 @@ def __len__(self): # Preferably use dbf record count if self.numRecords is None: self.__dbfHeader() - + return self.numRecords - + elif self.shp: # Otherwise use shape count if self.shx: @@ -1130,7 +1130,7 @@ def __len__(self): self.__shxHeader() return self.numShapes - + else: # Index file not available, iterate all shapes to get total count if self.numShapes is None: @@ -1156,12 +1156,12 @@ def __len__(self): self._offsets = offsets # Return to previous file position shp.seek(checkpoint) - + return self.numShapes - + else: # No file loaded yet, treat as 'empty' shapefile - return 0 + return 0 def __iter__(self): """Iterates through the shapes/records in the shapefile.""" @@ -1376,7 +1376,7 @@ def __shape(self, oid=None, bbox=None): record.m = [None] # Seek to the end of this record as defined by the record header because # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. + # definition. Probably allowed for lazy feature deletion. f.seek(next) return record @@ -1418,8 +1418,8 @@ def __shapeIndex(self, i=None): def shape(self, i=0, bbox=None): """Returns a shape object for a shape in the geometry record file. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. """ shp = self.__getFileObj(self.shp) i = self.__restrictIndex(i) @@ -1455,7 +1455,7 @@ def shape(self, i=0, bbox=None): def shapes(self, bbox=None): """Returns all shapes in a shapefile. To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ shapes = Shapes() shapes.extend(self.iterShapes(bbox=bbox)) @@ -1465,7 +1465,7 @@ def iterShapes(self, bbox=None): """Returns a generator of shapes in a shapefile. Useful for handling large shapefiles. To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ shp = self.__getFileObj(self.shp) # Found shapefiles which report incorrect @@ -1479,7 +1479,7 @@ def iterShapes(self, bbox=None): if self.numShapes: # Iterate exactly the number of shapes from shx header for i in xrange(self.numShapes): - # MAYBE: check if more left of file or exit early? + # MAYBE: check if more left of file or exit early? shape = self.__shape(oid=i, bbox=bbox) if shape: yield shape @@ -1500,7 +1500,7 @@ def iterShapes(self, bbox=None): # Entire shp file consumed # Update the number of shapes and list of offsets assert i == len(offsets) - self.numShapes = i + self.numShapes = i self._offsets = offsets def __dbfHeader(self): @@ -1530,7 +1530,7 @@ def __dbfHeader(self): terminator = dbf.read(1) if terminator != b"\r": raise ShapefileException("Shapefile dbf header lacks expected terminator. (likely corrupt?)") - + # insert deletion field at start self.fields.insert(0, ('DeletionFlag', 'C', 1, 0)) @@ -1546,9 +1546,9 @@ def __dbfHeader(self): self.__fullRecLookup = recLookup def __recordFmt(self, fields=None): - """Calculates the format and size of a .dbf record. Optional 'fields' arg + """Calculates the format and size of a .dbf record. Optional 'fields' arg specifies which fieldnames to unpack and which to ignore. Note that this - always includes the DeletionFlag at index 0, regardless of the 'fields' arg. + always includes the DeletionFlag at index 0, regardless of the 'fields' arg. """ if self.numRecords is None: self.__dbfHeader() @@ -1556,7 +1556,7 @@ def __recordFmt(self, fields=None): for fieldinfo in self.fields] if fields is not None: # only unpack specified fields, ignore others using padbytes (x) - structcodes = [code if fieldinfo[0] in fields + structcodes = [code if fieldinfo[0] in fields or fieldinfo[0] == 'DeletionFlag' # always unpack delflag else '%dx' % fieldinfo[2] for fieldinfo,code in zip(self.fields, structcodes)] @@ -1571,10 +1571,10 @@ def __recordFmt(self, fields=None): def __recordFields(self, fields=None): """Returns the necessary info required to unpack a record's fields, - restricted to a subset of fieldnames 'fields' if specified. - Returns a list of field info tuples, a name-index lookup dict, + restricted to a subset of fieldnames 'fields' if specified. + Returns a list of field info tuples, a name-index lookup dict, and a Struct instance for unpacking these fields. Note that DeletionFlag - is not a valid field. + is not a valid field. """ if fields is not None: # restrict info to the specified fields @@ -1604,13 +1604,13 @@ def __recordFields(self, fields=None): def __record(self, fieldTuples, recLookup, recStruct, oid=None): """Reads and returns a dbf record row as a list of values. Requires specifying - a list of field info tuples 'fieldTuples', a record name-index dict 'recLookup', - and a Struct instance 'recStruct' for unpacking these fields. + a list of field info tuples 'fieldTuples', a record name-index dict 'recLookup', + and a Struct instance 'recStruct' for unpacking these fields. """ f = self.__getFileObj(self.dbf) recordContents = recStruct.unpack(f.read(recStruct.size)) - + # deletion flag field is always unpacked as first value (see __recordFmt) if recordContents[0] != b' ': # deleted record @@ -1628,7 +1628,7 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): record = [] for (name, typ, size, deci),value in zip(fieldTuples, recordContents): if typ in ("N","F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b'\0')[0] value = value.replace(b'*', b'') # QGIS NULL is all '*' chars if value == b'': @@ -1645,7 +1645,7 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - value = int(value) + value = int(value) except ValueError: # forcing directly to int failed, so was probably a float. try: @@ -1689,7 +1689,7 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): def record(self, i=0, fields=None): """Returns a specific dbf record based on the supplied index. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. + list of one or more fieldnames. """ f = self.__getFileObj(self.dbf) if self.numRecords is None: @@ -1702,7 +1702,7 @@ def record(self, i=0, fields=None): return self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) def records(self, fields=None): - """Returns all records in a dbf file. + """Returns all records in a dbf file. To only read some of the fields, specify the 'fields' arg as a list of one or more fieldnames. """ @@ -1736,11 +1736,11 @@ def iterRecords(self, fields=None): def shapeRecord(self, i=0, fields=None, bbox=None): """Returns a combination geometry and attribute record for the - supplied record index. + supplied record index. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. + list of one or more fieldnames. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. """ i = self.__restrictIndex(i) shape = self.shape(i, bbox=bbox) @@ -1752,9 +1752,9 @@ def shapeRecords(self, fields=None, bbox=None): """Returns a list of combination geometry/attribute records for all records in a shapefile. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. + list of one or more fieldnames. To only read entries within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ return ShapeRecords(self.iterShapeRecords(fields=fields, bbox=bbox)) @@ -1762,9 +1762,9 @@ def iterShapeRecords(self, fields=None, bbox=None): """Returns a generator of combination geometry/attribute records for all records in a shapefile. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. + list of one or more fieldnames. To only read entries within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ if bbox is None: # iterate through all shapes and records @@ -1773,13 +1773,13 @@ def iterShapeRecords(self, fields=None, bbox=None): else: # only iterate where shape.bbox overlaps with the given bbox # TODO: internal __record method should be faster but would have to - # make sure to seek to correct file location... + # make sure to seek to correct file location... #fieldTuples,recLookup,recStruct = self.__recordFields(fields) for shape in self.iterShapes(bbox=bbox): if shape: #record = self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) - record = self.record(i=shape.oid, fields=fields) + record = self.record(i=shape.oid, fields=fields) yield ShapeRecord(shape=shape, record=record) @@ -1810,8 +1810,8 @@ def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): else: raise Exception('Either the target filepath, or any of shp, shx, or dbf must be set to create a shapefile.') # Initiate with empty headers, to be finalized upon closing - if self.shp: self.shp.write(b'9'*100) - if self.shx: self.shx.write(b'9'*100) + if self.shp: self.shp.write(b'9'*100) + if self.shx: self.shx.write(b'9'*100) # Geometry record offsets and lengths for writing shx file. self.recNum = 0 self.shpNum = 0 @@ -1819,16 +1819,16 @@ def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): self._zbox = None self._mbox = None # Use deletion flags in dbf? Default is false (0). Note: Currently has no effect, records should NOT contain deletion flags. - self.deletionFlag = 0 + self.deletionFlag = 0 # Encoding self.encoding = kwargs.pop('encoding', 'utf-8') self.encodingErrors = kwargs.pop('encodingErrors', 'strict') def __len__(self): - """Returns the current number of features written to the shapefile. + """Returns the current number of features written to the shapefile. If shapes and records are unbalanced, the length is considered the highest of the two.""" - return max(self.recNum, self.shpNum) + return max(self.recNum, self.shpNum) def __enter__(self): """ @@ -1853,7 +1853,7 @@ def close(self): shp_open = self.shp and not (hasattr(self.shp, 'closed') and self.shp.closed) shx_open = self.shx and not (hasattr(self.shx, 'closed') and self.shx.closed) dbf_open = self.dbf and not (hasattr(self.dbf, 'closed') and self.dbf.closed) - + # Balance if already not balanced if self.shp and shp_open and self.dbf and dbf_open: if self.autoBalance: @@ -1925,8 +1925,8 @@ def __bbox(self, s): y.extend(py) else: # this should not happen. - # any shape that is not null should have at least one point, and only those should be sent here. - # could also mean that earlier code failed to add points to a non-null shape. + # any shape that is not null should have at least one point, and only those should be sent here. + # could also mean that earlier code failed to add points to a non-null shape. raise Exception("Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." % s.shapeType) bbox = [min(x), min(y), max(x), max(y)] # update global @@ -2026,7 +2026,7 @@ def __shapefileHeader(self, fileObj, headerType='shp'): # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0,0,0,0] + bbox = [0,0,0,0] f.write(pack("<4d", *bbox)) except error: raise ShapefileException("Failed to write shapefile bounding box. Floats required.") @@ -2170,7 +2170,7 @@ def __shpRecord(self, s): f.write(pack("<%sd" % len(s.z), *s.z)) else: # if z values are stored as 3rd dimension - [f.write(pack(" 2 else 0)) for p in s.points] + [f.write(pack(" 2 else 0)) for p in s.points] except error: raise ShapefileException("Failed to write elevation values for record %s. Expected floats." % self.shpNum) # Write m extremes and values @@ -2182,7 +2182,7 @@ def __shpRecord(self, s): except error: raise ShapefileException("Failed to write measure extremes for record %s. Expected floats" % self.shpNum) try: - if hasattr(s,"m"): + if hasattr(s,"m"): # if m values are stored in attribute f.write(pack("<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m])) else: @@ -2230,7 +2230,7 @@ def __shpRecord(self, s): # if m values are stored in attribute try: if not s.m or s.m[0] is None: - s.m = (NODATA,) + s.m = (NODATA,) f.write(pack("<1d", s.m[0])) except error: raise ShapefileException("Failed to write measure value for record %s. Expected floats." % self.shpNum) @@ -2275,7 +2275,7 @@ def record(self, *recordList, **recordDict): # Balance if already not balanced if self.autoBalance and self.recNum > self.shpNum: self.balance() - + fieldCount = sum((1 for field in self.fields if field[0] != 'DeletionFlag')) if recordList: record = list(recordList) @@ -2313,7 +2313,7 @@ def __dbfRecord(self, record): self.recNum += 1 fields = (field for field in self.fields if field[0] != 'DeletionFlag') # ignore deletionflag field in case it was specified for (fieldName, fieldType, size, deci), value in zip(fields, record): - # write + # write fieldType = fieldType.upper() size = int(size) if fieldType in ("N","F"): @@ -2326,7 +2326,7 @@ def __dbfRecord(self, record): # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - value = int(value) + value = int(value) except ValueError: # forcing directly to int failed, so was probably a float. value = int(float(value)) @@ -2406,7 +2406,7 @@ def pointz(self, x, y, z=0, m=None): pointShape = Shape(shapeType) pointShape.points.append([x, y, z, m]) self.shape(pointShape) - + def multipoint(self, points): """Creates a MULTIPOINT shape. @@ -2591,8 +2591,8 @@ def field(self, name, fieldType="C", size="50", decimal=0): ## be written exclusively using saveShp, saveShx, and saveDbf respectively. ## If target is specified but not shp, shx, or dbf then the target path and ## file name are used. If no options or specified, a unique base file name -## is generated to save the files and the base file name is returned as a -## string. +## is generated to save the files and the base file name is returned as a +## string. ## """ ## # Balance if already not balanced ## if shp and dbf: @@ -2615,7 +2615,7 @@ def field(self, name, fieldType="C", size="50", decimal=0): ## if not target: ## temp = tempfile.NamedTemporaryFile(prefix="shapefile_",dir=os.getcwd()) ## target = temp.name -## generated = True +## generated = True ## self.saveShp(target) ## self.shp.close() ## self.saveShx(target) @@ -2661,7 +2661,7 @@ def summarize(self): runner.summarize(verbosity) return failure_count - + if __name__ == "__main__": """ Doctests are contained in the file 'README.md', and are tested using the built-in From 0b30ea1a0a2af81db32e3cc5fd66a423fb9ef970 Mon Sep 17 00:00:00 2001 From: bva-bme <62710558+bva-bme@users.noreply.github.com> Date: Tue, 8 Nov 2022 11:34:19 +0100 Subject: [PATCH 003/115] Added uppercase file extension compatibility for zip files --- shapefile.py | 194 +++++++++++++++++++++++++-------------------------- 1 file changed, 97 insertions(+), 97 deletions(-) diff --git a/shapefile.py b/shapefile.py index 19e53940..feb7af4c 100644 --- a/shapefile.py +++ b/shapefile.py @@ -85,7 +85,7 @@ from urllib.parse import urlparse, urlunparse from urllib.error import HTTPError from urllib.request import urlopen, Request - + else: from itertools import izip @@ -97,7 +97,7 @@ # Helpers MISSING = [None,''] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. if PYTHON3: def b(v, encoding='utf-8', encodingErrors='strict'): @@ -207,7 +207,7 @@ def signed_area(coords, fast=False): def is_cw(coords): """Returns True if a polygon ring has clockwise orientation, determined - by a negatively signed area. + by a negatively signed area. """ area2 = signed_area(coords, fast=True) return area2 < 0 @@ -245,7 +245,7 @@ def ring_contains_point(coords, p): Adapted from code by Eric Haynes http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c - + Original description: Shoot a test ray along +X axis. The strategy, from MacMartin, is to compare vertex Y values to the testing point's Y and quickly discard @@ -258,11 +258,11 @@ def ring_contains_point(coords, p): yflag0 = ( vtx0[1] >= ty ) inside_flag = False - for vtx1 in coords[1:]: + for vtx1 in coords[1:]: yflag1 = ( vtx1[1] >= ty ) # check if endpoints straddle (are on opposite sides) of X axis # (i.e. the Y's differ); if so, +X ray could intersect this edge. - if yflag0 != yflag1: + if yflag0 != yflag1: xflag0 = ( vtx0[0] >= tx ) # check if endpoints are on same side of the Y axis (i.e. X's # are the same); if so, it's easy to test if edge hits or misses. @@ -287,7 +287,7 @@ def ring_sample(coords, ccw=False): finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. The orientation of the ring is assumed to be clockwise, unless ccw - (counter-clockwise) is set to True. + (counter-clockwise) is set to True. """ triplet = [] def itercoords(): @@ -296,12 +296,12 @@ def itercoords(): yield p # finally, yield the second coordinate to the end to allow checking the last triplet yield coords[1] - - for p in itercoords(): + + for p in itercoords(): # add point to triplet (but not if duplicate) if p not in triplet: triplet.append(p) - + # new triplet, try to get sample if len(triplet) == 3: # check that triplet does not form a straight line (not a triangle) @@ -322,7 +322,7 @@ def itercoords(): # failed to get sample point from this triplet # remove oldest triplet coord to allow iterating to next triplet triplet.pop(0) - + else: raise Exception('Unexpected error: Unable to find a ring sample point.') @@ -334,14 +334,14 @@ def ring_contains_ring(coords1, coords2): def organize_polygon_rings(rings, return_errors=None): '''Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior - ring, and one or more interior holes. If a return_errors dict is provided (optional), - any errors encountered will be added to it. + ring, and one or more interior holes. If a return_errors dict is provided (optional), + any errors encountered will be added to it. Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). Rings are determined as exteriors if they run in clockwise direction, or interior holes if they run in counter-clockwise direction. This method is used to construct GeoJSON (multi)polygons from the shapefile polygon shape type, which does not - explicitly store the structure of the polygons beyond exterior/interior ring orientation. + explicitly store the structure of the polygons beyond exterior/interior ring orientation. ''' # first iterate rings and classify as exterior or hole exteriors = [] @@ -355,7 +355,7 @@ def organize_polygon_rings(rings, return_errors=None): else: # ring is a hole holes.append(ring) - + # if only one exterior, then all holes belong to that exterior if len(exteriors) == 1: # exit early @@ -374,7 +374,7 @@ def organize_polygon_rings(rings, return_errors=None): poly = [ext] polys.append(poly) return polys - + # first determine each hole's candidate exteriors based on simple bbox contains test hole_exteriors = dict([(hole_i,[]) for hole_i in xrange(len(holes))]) exterior_bboxes = [ring_bbox(ring) for ring in exteriors] @@ -386,7 +386,7 @@ def organize_polygon_rings(rings, return_errors=None): # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test for hole_i,exterior_candidates in hole_exteriors.items(): - + if len(exterior_candidates) > 1: # get hole sample point ccw = not is_cw(holes[hole_i]) @@ -404,7 +404,7 @@ def organize_polygon_rings(rings, return_errors=None): # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole for hole_i,exterior_candidates in hole_exteriors.items(): - + if len(exterior_candidates) > 1: # exterior candidate with the smallest area is the hole's most immediate parent ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x], fast=True)))[0] @@ -463,17 +463,17 @@ def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid= geometry record then those shapes are called parts. Parts are designated by their starting index in geometry record's list of shapes. For MultiPatch geometry, partTypes designates - the patch type of each of the parts. + the patch type of each of the parts. """ self.shapeType = shapeType self.points = points or [] self.parts = parts or [] if partTypes: self.partTypes = partTypes - + # and a dict to silently record any errors encountered self._errors = {} - + # add oid if oid is not None: self.__oid = oid @@ -557,12 +557,12 @@ def __geo_interface__(self): rings.append(ring) # organize rings into list of polygons, where each polygon is defined as list of rings. - # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). polys = organize_polygon_rings(rings, self._errors) - + # if VERBOSE is True, issue detailed warning about any shape errors # encountered during the Shapefile to GeoJSON conversion - if VERBOSE and self._errors: + if VERBOSE and self._errors: header = 'Possible issue encountered when converting Shape #{} to GeoJSON: '.format(self.oid) orphans = self._errors.get('polygon_orphaned_holes', None) if orphans: @@ -616,7 +616,7 @@ def _from_geojson(geoj): else: raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) shape.shapeType = shapeType - + # set points and parts if geojType == "Point": shape.points = [ geoj["coordinates"] ] @@ -629,9 +629,9 @@ def _from_geojson(geoj): parts = [] index = 0 for i,ext_or_hole in enumerate(geoj["coordinates"]): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. if i == 0 and not is_cw(ext_or_hole): # flip exterior direction ext_or_hole = rewind(ext_or_hole) @@ -659,9 +659,9 @@ def _from_geojson(geoj): index = 0 for polygon in geoj["coordinates"]: for i,ext_or_hole in enumerate(polygon): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. if i == 0 and not is_cw(ext_or_hole): # flip exterior direction ext_or_hole = rewind(ext_or_hole) @@ -726,7 +726,7 @@ def __getattr__(self, item): :param item: The field name, used as attribute :return: Value of the field :raises: AttributeError, if item is not a field of the shapefile - and IndexError, if the field exists but the field's + and IndexError, if the field exists but the field's corresponding value in the Record does not exist """ try: @@ -821,8 +821,8 @@ def __dir__(self): """ default = list(dir(type(self))) # default list methods and attributes of this class fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) - return default + fnames - + return default + fnames + class ShapeRecord(object): """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" @@ -917,8 +917,8 @@ class Reader(object): but is not required to read the geometry from the .shp file. The "shapefile" argument in the constructor is the name of the file you want to open, and can be the path - to a shapefile on a local filesystem, inside a zipfile, - or a url. + to a shapefile on a local filesystem, inside a zipfile, + or a url. You can instantiate a Reader without specifying a shapefile and then specify one later with the load() method. @@ -1047,7 +1047,7 @@ def __init__(self, *args, **kwargs): # Load and exit early self.load(path) return - + # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) if "shp" in kwargs.keys(): if hasattr(kwargs["shp"], "read"): @@ -1060,7 +1060,7 @@ def __init__(self, *args, **kwargs): else: (baseName, ext) = os.path.splitext(kwargs["shp"]) self.load_shp(baseName) - + if "shx" in kwargs.keys(): if hasattr(kwargs["shx"], "read"): self.shx = kwargs["shx"] @@ -1072,7 +1072,7 @@ def __init__(self, *args, **kwargs): else: (baseName, ext) = os.path.splitext(kwargs["shx"]) self.load_shx(baseName) - + if "dbf" in kwargs.keys(): if hasattr(kwargs["dbf"], "read"): self.dbf = kwargs["dbf"] @@ -1084,7 +1084,7 @@ def __init__(self, *args, **kwargs): else: (baseName, ext) = os.path.splitext(kwargs["dbf"]) self.load_dbf(baseName) - + # Load the files if self.shp or self.dbf: self.load() @@ -1120,9 +1120,9 @@ def __len__(self): # Preferably use dbf record count if self.numRecords is None: self.__dbfHeader() - + return self.numRecords - + elif self.shp: # Otherwise use shape count if self.shx: @@ -1130,7 +1130,7 @@ def __len__(self): self.__shxHeader() return self.numShapes - + else: # Index file not available, iterate all shapes to get total count if self.numShapes is None: @@ -1156,12 +1156,12 @@ def __len__(self): self._offsets = offsets # Return to previous file position shp.seek(checkpoint) - + return self.numShapes - + else: # No file loaded yet, treat as 'empty' shapefile - return 0 + return 0 def __iter__(self): """Iterates through the shapes/records in the shapefile.""" @@ -1376,7 +1376,7 @@ def __shape(self, oid=None, bbox=None): record.m = [None] # Seek to the end of this record as defined by the record header because # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. + # definition. Probably allowed for lazy feature deletion. f.seek(next) return record @@ -1418,8 +1418,8 @@ def __shapeIndex(self, i=None): def shape(self, i=0, bbox=None): """Returns a shape object for a shape in the geometry record file. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. """ shp = self.__getFileObj(self.shp) i = self.__restrictIndex(i) @@ -1455,7 +1455,7 @@ def shape(self, i=0, bbox=None): def shapes(self, bbox=None): """Returns all shapes in a shapefile. To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ shapes = Shapes() shapes.extend(self.iterShapes(bbox=bbox)) @@ -1465,7 +1465,7 @@ def iterShapes(self, bbox=None): """Returns a generator of shapes in a shapefile. Useful for handling large shapefiles. To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ shp = self.__getFileObj(self.shp) # Found shapefiles which report incorrect @@ -1479,7 +1479,7 @@ def iterShapes(self, bbox=None): if self.numShapes: # Iterate exactly the number of shapes from shx header for i in xrange(self.numShapes): - # MAYBE: check if more left of file or exit early? + # MAYBE: check if more left of file or exit early? shape = self.__shape(oid=i, bbox=bbox) if shape: yield shape @@ -1500,7 +1500,7 @@ def iterShapes(self, bbox=None): # Entire shp file consumed # Update the number of shapes and list of offsets assert i == len(offsets) - self.numShapes = i + self.numShapes = i self._offsets = offsets def __dbfHeader(self): @@ -1530,7 +1530,7 @@ def __dbfHeader(self): terminator = dbf.read(1) if terminator != b"\r": raise ShapefileException("Shapefile dbf header lacks expected terminator. (likely corrupt?)") - + # insert deletion field at start self.fields.insert(0, ('DeletionFlag', 'C', 1, 0)) @@ -1546,9 +1546,9 @@ def __dbfHeader(self): self.__fullRecLookup = recLookup def __recordFmt(self, fields=None): - """Calculates the format and size of a .dbf record. Optional 'fields' arg + """Calculates the format and size of a .dbf record. Optional 'fields' arg specifies which fieldnames to unpack and which to ignore. Note that this - always includes the DeletionFlag at index 0, regardless of the 'fields' arg. + always includes the DeletionFlag at index 0, regardless of the 'fields' arg. """ if self.numRecords is None: self.__dbfHeader() @@ -1556,7 +1556,7 @@ def __recordFmt(self, fields=None): for fieldinfo in self.fields] if fields is not None: # only unpack specified fields, ignore others using padbytes (x) - structcodes = [code if fieldinfo[0] in fields + structcodes = [code if fieldinfo[0] in fields or fieldinfo[0] == 'DeletionFlag' # always unpack delflag else '%dx' % fieldinfo[2] for fieldinfo,code in zip(self.fields, structcodes)] @@ -1571,10 +1571,10 @@ def __recordFmt(self, fields=None): def __recordFields(self, fields=None): """Returns the necessary info required to unpack a record's fields, - restricted to a subset of fieldnames 'fields' if specified. - Returns a list of field info tuples, a name-index lookup dict, + restricted to a subset of fieldnames 'fields' if specified. + Returns a list of field info tuples, a name-index lookup dict, and a Struct instance for unpacking these fields. Note that DeletionFlag - is not a valid field. + is not a valid field. """ if fields is not None: # restrict info to the specified fields @@ -1604,13 +1604,13 @@ def __recordFields(self, fields=None): def __record(self, fieldTuples, recLookup, recStruct, oid=None): """Reads and returns a dbf record row as a list of values. Requires specifying - a list of field info tuples 'fieldTuples', a record name-index dict 'recLookup', - and a Struct instance 'recStruct' for unpacking these fields. + a list of field info tuples 'fieldTuples', a record name-index dict 'recLookup', + and a Struct instance 'recStruct' for unpacking these fields. """ f = self.__getFileObj(self.dbf) recordContents = recStruct.unpack(f.read(recStruct.size)) - + # deletion flag field is always unpacked as first value (see __recordFmt) if recordContents[0] != b' ': # deleted record @@ -1628,7 +1628,7 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): record = [] for (name, typ, size, deci),value in zip(fieldTuples, recordContents): if typ in ("N","F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b'\0')[0] value = value.replace(b'*', b'') # QGIS NULL is all '*' chars if value == b'': @@ -1645,7 +1645,7 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - value = int(value) + value = int(value) except ValueError: # forcing directly to int failed, so was probably a float. try: @@ -1689,7 +1689,7 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): def record(self, i=0, fields=None): """Returns a specific dbf record based on the supplied index. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. + list of one or more fieldnames. """ f = self.__getFileObj(self.dbf) if self.numRecords is None: @@ -1702,7 +1702,7 @@ def record(self, i=0, fields=None): return self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) def records(self, fields=None): - """Returns all records in a dbf file. + """Returns all records in a dbf file. To only read some of the fields, specify the 'fields' arg as a list of one or more fieldnames. """ @@ -1736,11 +1736,11 @@ def iterRecords(self, fields=None): def shapeRecord(self, i=0, fields=None, bbox=None): """Returns a combination geometry and attribute record for the - supplied record index. + supplied record index. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. + list of one or more fieldnames. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. """ i = self.__restrictIndex(i) shape = self.shape(i, bbox=bbox) @@ -1752,9 +1752,9 @@ def shapeRecords(self, fields=None, bbox=None): """Returns a list of combination geometry/attribute records for all records in a shapefile. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. + list of one or more fieldnames. To only read entries within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ return ShapeRecords(self.iterShapeRecords(fields=fields, bbox=bbox)) @@ -1762,9 +1762,9 @@ def iterShapeRecords(self, fields=None, bbox=None): """Returns a generator of combination geometry/attribute records for all records in a shapefile. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. + list of one or more fieldnames. To only read entries within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ if bbox is None: # iterate through all shapes and records @@ -1773,13 +1773,13 @@ def iterShapeRecords(self, fields=None, bbox=None): else: # only iterate where shape.bbox overlaps with the given bbox # TODO: internal __record method should be faster but would have to - # make sure to seek to correct file location... + # make sure to seek to correct file location... #fieldTuples,recLookup,recStruct = self.__recordFields(fields) for shape in self.iterShapes(bbox=bbox): if shape: #record = self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) - record = self.record(i=shape.oid, fields=fields) + record = self.record(i=shape.oid, fields=fields) yield ShapeRecord(shape=shape, record=record) @@ -1810,8 +1810,8 @@ def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): else: raise Exception('Either the target filepath, or any of shp, shx, or dbf must be set to create a shapefile.') # Initiate with empty headers, to be finalized upon closing - if self.shp: self.shp.write(b'9'*100) - if self.shx: self.shx.write(b'9'*100) + if self.shp: self.shp.write(b'9'*100) + if self.shx: self.shx.write(b'9'*100) # Geometry record offsets and lengths for writing shx file. self.recNum = 0 self.shpNum = 0 @@ -1819,16 +1819,16 @@ def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): self._zbox = None self._mbox = None # Use deletion flags in dbf? Default is false (0). Note: Currently has no effect, records should NOT contain deletion flags. - self.deletionFlag = 0 + self.deletionFlag = 0 # Encoding self.encoding = kwargs.pop('encoding', 'utf-8') self.encodingErrors = kwargs.pop('encodingErrors', 'strict') def __len__(self): - """Returns the current number of features written to the shapefile. + """Returns the current number of features written to the shapefile. If shapes and records are unbalanced, the length is considered the highest of the two.""" - return max(self.recNum, self.shpNum) + return max(self.recNum, self.shpNum) def __enter__(self): """ @@ -1853,7 +1853,7 @@ def close(self): shp_open = self.shp and not (hasattr(self.shp, 'closed') and self.shp.closed) shx_open = self.shx and not (hasattr(self.shx, 'closed') and self.shx.closed) dbf_open = self.dbf and not (hasattr(self.dbf, 'closed') and self.dbf.closed) - + # Balance if already not balanced if self.shp and shp_open and self.dbf and dbf_open: if self.autoBalance: @@ -1925,8 +1925,8 @@ def __bbox(self, s): y.extend(py) else: # this should not happen. - # any shape that is not null should have at least one point, and only those should be sent here. - # could also mean that earlier code failed to add points to a non-null shape. + # any shape that is not null should have at least one point, and only those should be sent here. + # could also mean that earlier code failed to add points to a non-null shape. raise Exception("Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." % s.shapeType) bbox = [min(x), min(y), max(x), max(y)] # update global @@ -2026,7 +2026,7 @@ def __shapefileHeader(self, fileObj, headerType='shp'): # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0,0,0,0] + bbox = [0,0,0,0] f.write(pack("<4d", *bbox)) except error: raise ShapefileException("Failed to write shapefile bounding box. Floats required.") @@ -2170,7 +2170,7 @@ def __shpRecord(self, s): f.write(pack("<%sd" % len(s.z), *s.z)) else: # if z values are stored as 3rd dimension - [f.write(pack(" 2 else 0)) for p in s.points] + [f.write(pack(" 2 else 0)) for p in s.points] except error: raise ShapefileException("Failed to write elevation values for record %s. Expected floats." % self.shpNum) # Write m extremes and values @@ -2182,7 +2182,7 @@ def __shpRecord(self, s): except error: raise ShapefileException("Failed to write measure extremes for record %s. Expected floats" % self.shpNum) try: - if hasattr(s,"m"): + if hasattr(s,"m"): # if m values are stored in attribute f.write(pack("<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m])) else: @@ -2230,7 +2230,7 @@ def __shpRecord(self, s): # if m values are stored in attribute try: if not s.m or s.m[0] is None: - s.m = (NODATA,) + s.m = (NODATA,) f.write(pack("<1d", s.m[0])) except error: raise ShapefileException("Failed to write measure value for record %s. Expected floats." % self.shpNum) @@ -2275,7 +2275,7 @@ def record(self, *recordList, **recordDict): # Balance if already not balanced if self.autoBalance and self.recNum > self.shpNum: self.balance() - + fieldCount = sum((1 for field in self.fields if field[0] != 'DeletionFlag')) if recordList: record = list(recordList) @@ -2313,7 +2313,7 @@ def __dbfRecord(self, record): self.recNum += 1 fields = (field for field in self.fields if field[0] != 'DeletionFlag') # ignore deletionflag field in case it was specified for (fieldName, fieldType, size, deci), value in zip(fields, record): - # write + # write fieldType = fieldType.upper() size = int(size) if fieldType in ("N","F"): @@ -2326,7 +2326,7 @@ def __dbfRecord(self, record): # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - value = int(value) + value = int(value) except ValueError: # forcing directly to int failed, so was probably a float. value = int(float(value)) @@ -2406,7 +2406,7 @@ def pointz(self, x, y, z=0, m=None): pointShape = Shape(shapeType) pointShape.points.append([x, y, z, m]) self.shape(pointShape) - + def multipoint(self, points): """Creates a MULTIPOINT shape. @@ -2591,8 +2591,8 @@ def field(self, name, fieldType="C", size="50", decimal=0): ## be written exclusively using saveShp, saveShx, and saveDbf respectively. ## If target is specified but not shp, shx, or dbf then the target path and ## file name are used. If no options or specified, a unique base file name -## is generated to save the files and the base file name is returned as a -## string. +## is generated to save the files and the base file name is returned as a +## string. ## """ ## # Balance if already not balanced ## if shp and dbf: @@ -2615,7 +2615,7 @@ def field(self, name, fieldType="C", size="50", decimal=0): ## if not target: ## temp = tempfile.NamedTemporaryFile(prefix="shapefile_",dir=os.getcwd()) ## target = temp.name -## generated = True +## generated = True ## self.saveShp(target) ## self.shp.close() ## self.saveShx(target) @@ -2661,7 +2661,7 @@ def summarize(self): runner.summarize(verbosity) return failure_count - + if __name__ == "__main__": """ Doctests are contained in the file 'README.md', and are tested using the built-in From c35592c2654617e1cce0f0968aa7c55156667b34 Mon Sep 17 00:00:00 2001 From: "GeospatialPython.com" Date: Thu, 2 Feb 2023 15:34:15 -0600 Subject: [PATCH 004/115] Create FUNDING.yml --- .github/FUNDING.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..ad565874 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,13 @@ +# These are supported funding model platforms + +github: [geospatialpython] +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry +custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] From ec5f814736f6b3243b08610ed265d651d277fbe6 Mon Sep 17 00:00:00 2001 From: midichef <67946319+midichef@users.noreply.github.com> Date: Fri, 28 Jul 2023 16:43:17 -0700 Subject: [PATCH 005/115] Fix recursion error when copying Record with deepcopy() --- shapefile.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/shapefile.py b/shapefile.py index 04fb5ec0..6057d7cd 100644 --- a/shapefile.py +++ b/shapefile.py @@ -730,6 +730,8 @@ def __getattr__(self, item): corresponding value in the Record does not exist """ try: + if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() + raise AttributeError('_Record does not implement __setstate__') index = self.__field_positions[item] return list.__getitem__(self, index) except KeyError: From d3b6e2a91668867e4f2168d32b8f2c53abfb09d6 Mon Sep 17 00:00:00 2001 From: "GeospatialPython.com" Date: Thu, 17 Aug 2023 21:20:09 -0500 Subject: [PATCH 006/115] Update README.md Added .prj information based on some complaints in blog posts and youtube videos about PyShp. --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 042e11c4..dce792cc 100644 --- a/README.md +++ b/README.md @@ -1077,7 +1077,20 @@ If you do not use the autoBalance() or balance() method and forget to manually balance the geometry and attributes the shapefile will be viewed as corrupt by most shapefile software. +### Writing .prj files +A .prj file, or projection file, is a simple text file that stores a shapefile's map projection and coordinate reference system to help mapping software properly locate the geometry on a map. If you don't have one, you may get confusing errors when you try and use the shapefile you created. The GIS software may complain that it doesn't know the shapefile's projection and refuse to accept it, it may assume the shapefile is the same projection as the rest of your GIS project and put it in the wrong place, or it might assume the coordinates are an offset in meters from latitude and longitude 0,0 which will put your data in the middle of the ocean near Africa. The text in the .prj file is a [Well-Known-Text (WKT) projection string](https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems). Projection strings can be quite long so they are often referenced using numeric codes call EPSG codes. The .prj file must have the same base name as your shapefile. So for example if you have a shapefile named "myPoints.shp", the .prj file must be named "myPoints.prj". +If you're using the same projection over and over, the following is a simple way to create the .prj file assuming your base filename is stored in a variable called "filename": + + >>> with open("{}.prj".format(filename), "w") as prj: + >>> wkt = 'GEOGCS["WGS 84",' + >>> wkt += 'DATUM["WGS_1984",' + >>> wkt += 'SPHEROID["WGS 84",6378137,298.257223563]]' + >>> wkt += ',PRIMEM["Greenwich",0],' + >>> wkt += 'UNIT["degree",0.0174532925199433]]' + >>> prj.write(wkt) + +If you need to dynamically fetch WKT projection strings, you can use the pure Python [PyCRS](https://github.com/karimbahgat/PyCRS) module which has a number of useful features. # Advanced Use From 9d31036a194b490517c75ddfc7c15a3fda82056f Mon Sep 17 00:00:00 2001 From: lgolston <30876419+lgolston@users.noreply.github.com> Date: Thu, 24 Aug 2023 11:20:29 -0500 Subject: [PATCH 007/115] bbox filtering for single points --- README.md | 2 +- shapefile.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dce792cc..aac57ed7 100644 --- a/README.md +++ b/README.md @@ -1224,7 +1224,7 @@ Selectively reading only the necessary data in this way is particularly useful f ### Spatial filtering -Another common use-case is that we only want to read those records that are located in some region of interest. Because the shapefile stores the bounding box of each shape separately from the geometry data, it's possible to quickly retrieve all shapes that might overlap a given bounding box region without having to load the full shape geometry data for every shape. This can be done by specifying the `bbox` argument to any of the record or shape methods: +Another common use-case is that we only want to read those records that are located in some region of interest. Because the shapefile stores the bounding box of each shape separately from the geometry data, it's possible to quickly retrieve all shapes that might overlap a given bounding box region without having to load the full shape geometry data for every shape. This can be done by specifying the `bbox` argument to the shapes, iterShapes, or iterShapeRecords methods: >>> bbox = [36.423, 12.360, 43.123, 18.004] # ca bbox of Eritrea diff --git a/shapefile.py b/shapefile.py index 04fb5ec0..0bf8c5f6 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1360,6 +1360,12 @@ def __shape(self, oid=None, bbox=None): # Read a single point if shapeType in (1,11,21): record.points = [_Array('d', unpack("<2d", f.read(16)))] + # create bounding box for Point by duplicating coordinates + point_bbox = list(record.points[0] + record.points[0]) + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, point_bbox): + f.seek(next) + return None # Read a single Z value if shapeType == 11: record.z = list(unpack(" Date: Thu, 24 Aug 2023 13:18:18 -0500 Subject: [PATCH 008/115] test equality for string, numbers --- test_shapefile.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/test_shapefile.py b/test_shapefile.py index 93bb6049..15bc42f5 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -411,9 +411,9 @@ def test_reader_shapefile_type(): is returned correctly. """ with shapefile.Reader("shapefiles/blockgroups") as sf: - assert sf.shapeType is 5 # 5 means Polygon - assert sf.shapeType is shapefile.POLYGON - assert sf.shapeTypeName is "POLYGON" + assert sf.shapeType == 5 # 5 means Polygon + assert sf.shapeType == shapefile.POLYGON + assert sf.shapeTypeName == "POLYGON" def test_reader_shapefile_length(): @@ -429,9 +429,9 @@ def test_reader_shapefile_length(): def test_shape_metadata(): with shapefile.Reader("shapefiles/blockgroups") as sf: shape = sf.shape(0) - assert shape.shapeType is 5 # Polygon - assert shape.shapeType is shapefile.POLYGON - assert sf.shapeTypeName is "POLYGON" + assert shape.shapeType == 5 # Polygon + assert shape.shapeType == shapefile.POLYGON + assert sf.shapeTypeName == "POLYGON" def test_reader_fields(): @@ -497,9 +497,9 @@ def test_reader_shp_shx_only(): with shapefile.Reader(shp="shapefiles/blockgroups.shp", shx="shapefiles/blockgroups.shx") as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 + - def test_reader_shp_dbf_only(): """ Assert that specifying just the @@ -509,7 +509,7 @@ def test_reader_shp_dbf_only(): with shapefile.Reader(shp="shapefiles/blockgroups.shp", dbf="shapefiles/blockgroups.dbf") as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 record = sf.record(3) assert record[1:3] == ['060750601001', 4715] @@ -523,7 +523,7 @@ def test_reader_shp_only(): with shapefile.Reader(shp="shapefiles/blockgroups.shp") as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 def test_reader_filelike_dbf_only(): @@ -547,7 +547,7 @@ def test_reader_filelike_shp_shx_only(): with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb"), shx=open("shapefiles/blockgroups.shx", "rb")) as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 def test_reader_filelike_shp_dbf_only(): @@ -559,7 +559,7 @@ def test_reader_filelike_shp_dbf_only(): with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb"), dbf=open("shapefiles/blockgroups.dbf", "rb")) as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 record = sf.record(3) assert record[1:3] == ['060750601001', 4715] @@ -573,7 +573,7 @@ def test_reader_filelike_shp_only(): with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb")) as sf: assert len(sf) == 663 shape = sf.shape(3) - assert len(shape.points) is 173 + assert len(shape.points) == 173 def test_reader_shapefile_delayed_load(): @@ -1101,7 +1101,7 @@ def test_shaperecord_shape(): shaperec = sf.shapeRecord(3) shape = shaperec.shape point = shape.points[0] - assert len(point) is 2 + assert len(point) == 2 def test_shaperecord_record(): From 68a7a47980243b21935d7f209c39786e551f1a1b Mon Sep 17 00:00:00 2001 From: lgolston <30876419+lgolston@users.noreply.github.com> Date: Thu, 24 Aug 2023 15:47:57 -0500 Subject: [PATCH 009/115] add bbox None check --- shapefile.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/shapefile.py b/shapefile.py index 0bf8c5f6..325f284c 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1360,12 +1360,13 @@ def __shape(self, oid=None, bbox=None): # Read a single point if shapeType in (1,11,21): record.points = [_Array('d', unpack("<2d", f.read(16)))] - # create bounding box for Point by duplicating coordinates - point_bbox = list(record.points[0] + record.points[0]) - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, point_bbox): - f.seek(next) - return None + if bbox is not None: + # create bounding box for Point by duplicating coordinates + point_bbox = list(record.points[0] + record.points[0]) + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, point_bbox): + f.seek(next) + return None # Read a single Z value if shapeType == 11: record.z = list(unpack(" Date: Tue, 16 Jan 2024 14:51:21 +0000 Subject: [PATCH 010/115] Reformat code block as markdown literal code, so doctest skips it. --- README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index dce792cc..809dce80 100644 --- a/README.md +++ b/README.md @@ -1082,13 +1082,15 @@ A .prj file, or projection file, is a simple text file that stores a shapefile's If you're using the same projection over and over, the following is a simple way to create the .prj file assuming your base filename is stored in a variable called "filename": - >>> with open("{}.prj".format(filename), "w") as prj: - >>> wkt = 'GEOGCS["WGS 84",' - >>> wkt += 'DATUM["WGS_1984",' - >>> wkt += 'SPHEROID["WGS 84",6378137,298.257223563]]' - >>> wkt += ',PRIMEM["Greenwich",0],' - >>> wkt += 'UNIT["degree",0.0174532925199433]]' - >>> prj.write(wkt) +``` + with open("{}.prj".format(filename), "w") as prj: + wkt = 'GEOGCS["WGS 84",' + wkt += 'DATUM["WGS_1984",' + wkt += 'SPHEROID["WGS 84",6378137,298.257223563]]' + wkt += ',PRIMEM["Greenwich",0],' + wkt += 'UNIT["degree",0.0174532925199433]]' + prj.write(wkt) +``` If you need to dynamically fetch WKT projection strings, you can use the pure Python [PyCRS](https://github.com/karimbahgat/PyCRS) module which has a number of useful features. From 603b4755749f3ce2f1e95e7baeda013731fe97eb Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:10:29 +0000 Subject: [PATCH 011/115] Drop actions/setup-python to keep Python 2 support. Run action in containers. --- .github/workflows/build.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 26e1159c..541d51ab 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,18 +12,17 @@ on: jobs: build: - runs-on: ubuntu-latest strategy: fail-fast: false matrix: - python-version: ["2.7", "3.5", "3.6", "3.7", "3.8", "3.9"] + python-version: ["2.7.18", "3.6.15", "3.7.17", "3.8.18", "3.9.18"] + + runs-on: ubuntu-latest + container: + image: python:${{ matrix.python-version }}-slim steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} + - uses: actions/checkout@v3 - name: Install dependencies run: | python -m pip install --upgrade pip From 5edd0335e474fb8d8d594cda1ddd22e7eb86cd5a Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:54:11 +0000 Subject: [PATCH 012/115] Trigger CI --- .vscode/settings.json | 5 +++++ README.md | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..fe3b5f52 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "yaml.schemas": { + "https://json.schemastore.org/github-workflow.json": "file:///c%3A/Users/drjam/Coding/repos/IronPyShp/.github/workflows/deploy.yml" + } +} \ No newline at end of file diff --git a/README.md b/README.md index 809dce80..5ab26c2a 100644 --- a/README.md +++ b/README.md @@ -1097,7 +1097,7 @@ If you need to dynamically fetch WKT projection strings, you can use the pure Py # Advanced Use ## Common Errors and Fixes - + Below we list some commonly encountered errors and ways to fix them. ### Warnings and Logging From 043e8d7b692eeb0a97e5dcb926dfb0e7ee18140f Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 16:57:30 +0000 Subject: [PATCH 013/115] Update build.yml --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 541d51ab..92566388 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,9 +5,9 @@ name: build on: push: - branches: [ master ] + branches: [ PyShp-fix-CI ] pull_request: - branches: [ master ] + branches: [ PyShp-fix-CI ] jobs: build: From 9f5f0824d0449e251ef2c9efb697a8d3d3f924e7 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:08:20 +0000 Subject: [PATCH 014/115] Target "master" branch for PR into PyShp --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 92566388..541d51ab 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,9 +5,9 @@ name: build on: push: - branches: [ PyShp-fix-CI ] + branches: [ master ] pull_request: - branches: [ PyShp-fix-CI ] + branches: [ master ] jobs: build: From e796e9250066e2322190bae88aac80a83cb30f53 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:12:53 +0000 Subject: [PATCH 015/115] Don't track VS Code settings --- .gitignore | 1 + .vscode/settings.json | 5 ----- 2 files changed, 1 insertion(+), 5 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore index a82da866..03827eb3 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,4 @@ build/ dist/ *.egg-info/ *.py[cod] +.vscode diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index fe3b5f52..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "yaml.schemas": { - "https://json.schemastore.org/github-workflow.json": "file:///c%3A/Users/drjam/Coding/repos/IronPyShp/.github/workflows/deploy.yml" - } -} \ No newline at end of file From 588617b17cc65815d84f69ebdbe438dfcb4ae5be Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:19:48 +0000 Subject: [PATCH 016/115] Trigger CI, test tests on , Python:3.5.10-slim --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 541d51ab..dc8cf8a3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["2.7.18", "3.6.15", "3.7.17", "3.8.18", "3.9.18"] + python-version: ["2.7.18", , "3.5.10", "3.6.15", "3.7.17", "3.8.18", "3.9.18"] runs-on: ubuntu-latest container: From 2e7b9fb1a6191fca494ce06b5592858a320ab8ec Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:20:55 +0000 Subject: [PATCH 017/115] Remove extra comma --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dc8cf8a3..0e69616c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["2.7.18", , "3.5.10", "3.6.15", "3.7.17", "3.8.18", "3.9.18"] + python-version: ["2.7.18", "3.5.10", "3.6.15", "3.7.17", "3.8.18", "3.9.18"] runs-on: ubuntu-latest container: From 5893530349bc67b5521b41618f901f2d90020f9b Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:22:45 +0000 Subject: [PATCH 018/115] Trigger CI --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5ab26c2a..809dce80 100644 --- a/README.md +++ b/README.md @@ -1097,7 +1097,7 @@ If you need to dynamically fetch WKT projection strings, you can use the pure Py # Advanced Use ## Common Errors and Fixes - + Below we list some commonly encountered errors and ways to fix them. ### Warnings and Logging From 543a3d305e583b9516b4daa427bd052aea88430b Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:24:44 +0000 Subject: [PATCH 019/115] Target PyShp-fix-CI --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0e69616c..f8d8bb5e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,9 +5,9 @@ name: build on: push: - branches: [ master ] + branches: [ PyShp-fix-CI ] pull_request: - branches: [ master ] + branches: [ PyShp-fix-CI ] jobs: build: From 77322dabdcb136204f4a758b56864b0d3479622f Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:26:56 +0000 Subject: [PATCH 020/115] Target master for PR into PyShp --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f8d8bb5e..0e69616c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,9 +5,9 @@ name: build on: push: - branches: [ PyShp-fix-CI ] + branches: [ master ] pull_request: - branches: [ PyShp-fix-CI ] + branches: [ master ] jobs: build: From 4b56c1ccc81d03cc789c1263f0cb63fd455ecb89 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:09:33 +0000 Subject: [PATCH 021/115] Add python 3.10, ..., 3.13 --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0e69616c..24676fa0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["2.7.18", "3.5.10", "3.6.15", "3.7.17", "3.8.18", "3.9.18"] + python-version: ["2.7.18", "3.5.10", "3.6.15", "3.7.17", "3.8.18", "3.9.18", "3.10.13", "3.11.7", "3.12.1", "3.13.0a2"] runs-on: ubuntu-latest container: From 87fca4ec768f4790ce5229622bc6e46fac895b47 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:16:26 +0000 Subject: [PATCH 022/115] Trigger CI --- requirements.test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.test.txt b/requirements.test.txt index b3eaa8c8..731c5b93 100644 --- a/requirements.test.txt +++ b/requirements.test.txt @@ -1,2 +1,2 @@ -pytest==3.2.5 +pytest==3.2.5 setuptools From 2cce58574d3954bcff473e6785d03babad1b2321 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:17:31 +0000 Subject: [PATCH 023/115] Target PyShp-CI-test-on-Python-3.10+ --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 24676fa0..311d6e74 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,9 +5,9 @@ name: build on: push: - branches: [ master ] + branches: [ PyShp-CI-test-on-Python-3.10+ ] pull_request: - branches: [ master ] + branches: [ PyShp-CI-test-on-Python-3.10+ ] jobs: build: From 4217a3a6032ded5c58aab4f884c1940be7b24fd3 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:19:48 +0000 Subject: [PATCH 024/115] Try quoting the branch name --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 311d6e74..bc052caa 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,9 +5,9 @@ name: build on: push: - branches: [ PyShp-CI-test-on-Python-3.10+ ] + branches: [ "PyShp-CI-test-on-Python-3.10+" ] pull_request: - branches: [ PyShp-CI-test-on-Python-3.10+ ] + branches: [ "PyShp-CI-test-on-Python-3.10+" ] jobs: build: From b0de71bd9f22ba1a68118abd209f81c7a8efd13b Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:22:55 +0000 Subject: [PATCH 025/115] Update build.yml --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bc052caa..dc5acb70 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,9 +5,9 @@ name: build on: push: - branches: [ "PyShp-CI-test-on-Python-3.10+" ] + branches: [ "PyShp-test-on-Pythons-3.10-3.13" ] pull_request: - branches: [ "PyShp-CI-test-on-Python-3.10+" ] + branches: [ "PyShp-test-on-Pythons-3.10-3.13" ] jobs: build: From c4fd601195a3e7829282ccd60429d2342015a0a3 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:25:38 +0000 Subject: [PATCH 026/115] Relax Pytest pinned version --- requirements.test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.test.txt b/requirements.test.txt index 731c5b93..27472efe 100644 --- a/requirements.test.txt +++ b/requirements.test.txt @@ -1,2 +1,2 @@ -pytest==3.2.5 +pytest setuptools From d3c58348d0265ed7f36343fce019b8e13a98f9da Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:27:14 +0000 Subject: [PATCH 027/115] Target master for PR for PyShp --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dc5acb70..24676fa0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,9 +5,9 @@ name: build on: push: - branches: [ "PyShp-test-on-Pythons-3.10-3.13" ] + branches: [ master ] pull_request: - branches: [ "PyShp-test-on-Pythons-3.10-3.13" ] + branches: [ master ] jobs: build: From 29bfda3087582a08e48677f7e8e89a7ac3dc5513 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 17 Jul 2024 10:10:54 +0100 Subject: [PATCH 028/115] Run on workflow_dispatch --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 24676fa0..1f46613f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,6 +8,7 @@ on: branches: [ master ] pull_request: branches: [ master ] + workflow_dispatch: jobs: build: From 2591b380c20ff213c76945f7542bdb4d05fb9f06 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 17 Jul 2024 12:59:30 +0100 Subject: [PATCH 029/115] Swap dead link to one in a new repo we control (fix doctest). --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 71d42737..c3b1bb9b 100644 --- a/README.md +++ b/README.md @@ -299,7 +299,7 @@ Finally, you can use all of the above methods to read shapefiles directly from t >>> # from a zipped shapefile on website - >>> sf = shapefile.Reader("https://biogeo.ucdavis.edu/data/diva/rrd/NIC_rrd.zip") + >>> sf = shapefile.Reader("https://github.com/JamesParrott/PyShp_test_shapefile/raw/main/gis_osm_natural_a_free_1.zip") >>> # from a shapefile collection of files in a github repository >>> sf = shapefile.Reader("https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true") From daab62f87fb5578fbd9aaf487d59a135212cad40 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 17 Jul 2024 13:00:30 +0100 Subject: [PATCH 030/115] Update test_shapefile.py --- test_shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_shapefile.py b/test_shapefile.py index 15bc42f5..774e59cd 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -275,7 +275,7 @@ def test_reader_url(): pass # test reading zipfile from url - url = "https://biogeo.ucdavis.edu/data/diva/rrd/NIC_rrd.zip" + url = "https://github.com/JamesParrott/PyShp_test_shapefile/raw/main/gis_osm_natural_a_free_1.zip" with shapefile.Reader(url) as sf: for recShape in sf.iterShapeRecords(): pass From 2463dd9be222e186c80ee500674989af8ed99f65 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Wed, 28 Aug 2024 06:39:16 -0700 Subject: [PATCH 031/115] build.yml: Allow all pushes to run the workflow. This allows contributors to see how the tests are doing in their forks. --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1f46613f..b1b885cf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,7 +5,6 @@ name: build on: push: - branches: [ master ] pull_request: branches: [ master ] workflow_dispatch: From 20a39e46e2e6557bf4709e1b87d453df73760cb1 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Wed, 28 Aug 2024 06:49:34 -0700 Subject: [PATCH 032/115] *.py: Remove trailing empty spaces. Before: ``` file *.py setup.py: Python script, ASCII text executable shapefile.py: Python script, ASCII text executable, with CRLF line terminators test_shapefile.py: Python script, ASCII text executable ``` Do the strip: ``` perl -pi -e 's/\s+\n/\n/g' *.py ``` After: ``` file *.py setup.py: Python script, ASCII text executable shapefile.py: Python script, ASCII text executable test_shapefile.py: Python script, ASCII text executable ``` Move `shapefile.py` back to dos formatting: ``` unix2dos shapefile.py ``` --- shapefile.py | 196 +++++++++++++++++++++++----------------------- test_shapefile.py | 90 ++++++++++----------- 2 files changed, 143 insertions(+), 143 deletions(-) diff --git a/shapefile.py b/shapefile.py index 3b4e5295..26d93dc6 100644 --- a/shapefile.py +++ b/shapefile.py @@ -85,7 +85,7 @@ from urllib.parse import urlparse, urlunparse from urllib.error import HTTPError from urllib.request import urlopen, Request - + else: from itertools import izip @@ -97,7 +97,7 @@ # Helpers MISSING = [None,''] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. if PYTHON3: def b(v, encoding='utf-8', encodingErrors='strict'): @@ -207,7 +207,7 @@ def signed_area(coords, fast=False): def is_cw(coords): """Returns True if a polygon ring has clockwise orientation, determined - by a negatively signed area. + by a negatively signed area. """ area2 = signed_area(coords, fast=True) return area2 < 0 @@ -245,7 +245,7 @@ def ring_contains_point(coords, p): Adapted from code by Eric Haynes http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c - + Original description: Shoot a test ray along +X axis. The strategy, from MacMartin, is to compare vertex Y values to the testing point's Y and quickly discard @@ -258,11 +258,11 @@ def ring_contains_point(coords, p): yflag0 = ( vtx0[1] >= ty ) inside_flag = False - for vtx1 in coords[1:]: + for vtx1 in coords[1:]: yflag1 = ( vtx1[1] >= ty ) # check if endpoints straddle (are on opposite sides) of X axis # (i.e. the Y's differ); if so, +X ray could intersect this edge. - if yflag0 != yflag1: + if yflag0 != yflag1: xflag0 = ( vtx0[0] >= tx ) # check if endpoints are on same side of the Y axis (i.e. X's # are the same); if so, it's easy to test if edge hits or misses. @@ -287,7 +287,7 @@ def ring_sample(coords, ccw=False): finding the first centroid of a coordinate triplet whose orientation matches the orientation of the ring and passes the point-in-ring test. The orientation of the ring is assumed to be clockwise, unless ccw - (counter-clockwise) is set to True. + (counter-clockwise) is set to True. """ triplet = [] def itercoords(): @@ -296,12 +296,12 @@ def itercoords(): yield p # finally, yield the second coordinate to the end to allow checking the last triplet yield coords[1] - - for p in itercoords(): + + for p in itercoords(): # add point to triplet (but not if duplicate) if p not in triplet: triplet.append(p) - + # new triplet, try to get sample if len(triplet) == 3: # check that triplet does not form a straight line (not a triangle) @@ -322,7 +322,7 @@ def itercoords(): # failed to get sample point from this triplet # remove oldest triplet coord to allow iterating to next triplet triplet.pop(0) - + else: raise Exception('Unexpected error: Unable to find a ring sample point.') @@ -334,14 +334,14 @@ def ring_contains_ring(coords1, coords2): def organize_polygon_rings(rings, return_errors=None): '''Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior - ring, and one or more interior holes. If a return_errors dict is provided (optional), - any errors encountered will be added to it. + ring, and one or more interior holes. If a return_errors dict is provided (optional), + any errors encountered will be added to it. Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). Rings are determined as exteriors if they run in clockwise direction, or interior holes if they run in counter-clockwise direction. This method is used to construct GeoJSON (multi)polygons from the shapefile polygon shape type, which does not - explicitly store the structure of the polygons beyond exterior/interior ring orientation. + explicitly store the structure of the polygons beyond exterior/interior ring orientation. ''' # first iterate rings and classify as exterior or hole exteriors = [] @@ -355,7 +355,7 @@ def organize_polygon_rings(rings, return_errors=None): else: # ring is a hole holes.append(ring) - + # if only one exterior, then all holes belong to that exterior if len(exteriors) == 1: # exit early @@ -374,7 +374,7 @@ def organize_polygon_rings(rings, return_errors=None): poly = [ext] polys.append(poly) return polys - + # first determine each hole's candidate exteriors based on simple bbox contains test hole_exteriors = dict([(hole_i,[]) for hole_i in xrange(len(holes))]) exterior_bboxes = [ring_bbox(ring) for ring in exteriors] @@ -386,7 +386,7 @@ def organize_polygon_rings(rings, return_errors=None): # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test for hole_i,exterior_candidates in hole_exteriors.items(): - + if len(exterior_candidates) > 1: # get hole sample point ccw = not is_cw(holes[hole_i]) @@ -404,7 +404,7 @@ def organize_polygon_rings(rings, return_errors=None): # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole for hole_i,exterior_candidates in hole_exteriors.items(): - + if len(exterior_candidates) > 1: # exterior candidate with the smallest area is the hole's most immediate parent ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x], fast=True)))[0] @@ -463,17 +463,17 @@ def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid= geometry record then those shapes are called parts. Parts are designated by their starting index in geometry record's list of shapes. For MultiPatch geometry, partTypes designates - the patch type of each of the parts. + the patch type of each of the parts. """ self.shapeType = shapeType self.points = points or [] self.parts = parts or [] if partTypes: self.partTypes = partTypes - + # and a dict to silently record any errors encountered self._errors = {} - + # add oid if oid is not None: self.__oid = oid @@ -557,12 +557,12 @@ def __geo_interface__(self): rings.append(ring) # organize rings into list of polygons, where each polygon is defined as list of rings. - # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). polys = organize_polygon_rings(rings, self._errors) - + # if VERBOSE is True, issue detailed warning about any shape errors # encountered during the Shapefile to GeoJSON conversion - if VERBOSE and self._errors: + if VERBOSE and self._errors: header = 'Possible issue encountered when converting Shape #{} to GeoJSON: '.format(self.oid) orphans = self._errors.get('polygon_orphaned_holes', None) if orphans: @@ -616,7 +616,7 @@ def _from_geojson(geoj): else: raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) shape.shapeType = shapeType - + # set points and parts if geojType == "Point": shape.points = [ geoj["coordinates"] ] @@ -629,9 +629,9 @@ def _from_geojson(geoj): parts = [] index = 0 for i,ext_or_hole in enumerate(geoj["coordinates"]): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. if i == 0 and not is_cw(ext_or_hole): # flip exterior direction ext_or_hole = rewind(ext_or_hole) @@ -659,9 +659,9 @@ def _from_geojson(geoj): index = 0 for polygon in geoj["coordinates"]: for i,ext_or_hole in enumerate(polygon): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. if i == 0 and not is_cw(ext_or_hole): # flip exterior direction ext_or_hole = rewind(ext_or_hole) @@ -726,7 +726,7 @@ def __getattr__(self, item): :param item: The field name, used as attribute :return: Value of the field :raises: AttributeError, if item is not a field of the shapefile - and IndexError, if the field exists but the field's + and IndexError, if the field exists but the field's corresponding value in the Record does not exist """ try: @@ -823,8 +823,8 @@ def __dir__(self): """ default = list(dir(type(self))) # default list methods and attributes of this class fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) - return default + fnames - + return default + fnames + class ShapeRecord(object): """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" @@ -919,8 +919,8 @@ class Reader(object): but is not required to read the geometry from the .shp file. The "shapefile" argument in the constructor is the name of the file you want to open, and can be the path - to a shapefile on a local filesystem, inside a zipfile, - or a url. + to a shapefile on a local filesystem, inside a zipfile, + or a url. You can instantiate a Reader without specifying a shapefile and then specify one later with the load() method. @@ -1049,7 +1049,7 @@ def __init__(self, *args, **kwargs): # Load and exit early self.load(path) return - + # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) if "shp" in kwargs.keys(): if hasattr(kwargs["shp"], "read"): @@ -1062,7 +1062,7 @@ def __init__(self, *args, **kwargs): else: (baseName, ext) = os.path.splitext(kwargs["shp"]) self.load_shp(baseName) - + if "shx" in kwargs.keys(): if hasattr(kwargs["shx"], "read"): self.shx = kwargs["shx"] @@ -1074,7 +1074,7 @@ def __init__(self, *args, **kwargs): else: (baseName, ext) = os.path.splitext(kwargs["shx"]) self.load_shx(baseName) - + if "dbf" in kwargs.keys(): if hasattr(kwargs["dbf"], "read"): self.dbf = kwargs["dbf"] @@ -1086,7 +1086,7 @@ def __init__(self, *args, **kwargs): else: (baseName, ext) = os.path.splitext(kwargs["dbf"]) self.load_dbf(baseName) - + # Load the files if self.shp or self.dbf: self.load() @@ -1122,9 +1122,9 @@ def __len__(self): # Preferably use dbf record count if self.numRecords is None: self.__dbfHeader() - + return self.numRecords - + elif self.shp: # Otherwise use shape count if self.shx: @@ -1132,7 +1132,7 @@ def __len__(self): self.__shxHeader() return self.numShapes - + else: # Index file not available, iterate all shapes to get total count if self.numShapes is None: @@ -1158,12 +1158,12 @@ def __len__(self): self._offsets = offsets # Return to previous file position shp.seek(checkpoint) - + return self.numShapes - + else: # No file loaded yet, treat as 'empty' shapefile - return 0 + return 0 def __iter__(self): """Iterates through the shapes/records in the shapefile.""" @@ -1385,7 +1385,7 @@ def __shape(self, oid=None, bbox=None): record.m = [None] # Seek to the end of this record as defined by the record header because # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. + # definition. Probably allowed for lazy feature deletion. f.seek(next) return record @@ -1427,8 +1427,8 @@ def __shapeIndex(self, i=None): def shape(self, i=0, bbox=None): """Returns a shape object for a shape in the geometry record file. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. """ shp = self.__getFileObj(self.shp) i = self.__restrictIndex(i) @@ -1464,7 +1464,7 @@ def shape(self, i=0, bbox=None): def shapes(self, bbox=None): """Returns all shapes in a shapefile. To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ shapes = Shapes() shapes.extend(self.iterShapes(bbox=bbox)) @@ -1474,7 +1474,7 @@ def iterShapes(self, bbox=None): """Returns a generator of shapes in a shapefile. Useful for handling large shapefiles. To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ shp = self.__getFileObj(self.shp) # Found shapefiles which report incorrect @@ -1488,7 +1488,7 @@ def iterShapes(self, bbox=None): if self.numShapes: # Iterate exactly the number of shapes from shx header for i in xrange(self.numShapes): - # MAYBE: check if more left of file or exit early? + # MAYBE: check if more left of file or exit early? shape = self.__shape(oid=i, bbox=bbox) if shape: yield shape @@ -1509,7 +1509,7 @@ def iterShapes(self, bbox=None): # Entire shp file consumed # Update the number of shapes and list of offsets assert i == len(offsets) - self.numShapes = i + self.numShapes = i self._offsets = offsets def __dbfHeader(self): @@ -1539,7 +1539,7 @@ def __dbfHeader(self): terminator = dbf.read(1) if terminator != b"\r": raise ShapefileException("Shapefile dbf header lacks expected terminator. (likely corrupt?)") - + # insert deletion field at start self.fields.insert(0, ('DeletionFlag', 'C', 1, 0)) @@ -1555,9 +1555,9 @@ def __dbfHeader(self): self.__fullRecLookup = recLookup def __recordFmt(self, fields=None): - """Calculates the format and size of a .dbf record. Optional 'fields' arg + """Calculates the format and size of a .dbf record. Optional 'fields' arg specifies which fieldnames to unpack and which to ignore. Note that this - always includes the DeletionFlag at index 0, regardless of the 'fields' arg. + always includes the DeletionFlag at index 0, regardless of the 'fields' arg. """ if self.numRecords is None: self.__dbfHeader() @@ -1565,7 +1565,7 @@ def __recordFmt(self, fields=None): for fieldinfo in self.fields] if fields is not None: # only unpack specified fields, ignore others using padbytes (x) - structcodes = [code if fieldinfo[0] in fields + structcodes = [code if fieldinfo[0] in fields or fieldinfo[0] == 'DeletionFlag' # always unpack delflag else '%dx' % fieldinfo[2] for fieldinfo,code in zip(self.fields, structcodes)] @@ -1580,10 +1580,10 @@ def __recordFmt(self, fields=None): def __recordFields(self, fields=None): """Returns the necessary info required to unpack a record's fields, - restricted to a subset of fieldnames 'fields' if specified. - Returns a list of field info tuples, a name-index lookup dict, + restricted to a subset of fieldnames 'fields' if specified. + Returns a list of field info tuples, a name-index lookup dict, and a Struct instance for unpacking these fields. Note that DeletionFlag - is not a valid field. + is not a valid field. """ if fields is not None: # restrict info to the specified fields @@ -1613,13 +1613,13 @@ def __recordFields(self, fields=None): def __record(self, fieldTuples, recLookup, recStruct, oid=None): """Reads and returns a dbf record row as a list of values. Requires specifying - a list of field info tuples 'fieldTuples', a record name-index dict 'recLookup', - and a Struct instance 'recStruct' for unpacking these fields. + a list of field info tuples 'fieldTuples', a record name-index dict 'recLookup', + and a Struct instance 'recStruct' for unpacking these fields. """ f = self.__getFileObj(self.dbf) recordContents = recStruct.unpack(f.read(recStruct.size)) - + # deletion flag field is always unpacked as first value (see __recordFmt) if recordContents[0] != b' ': # deleted record @@ -1637,7 +1637,7 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): record = [] for (name, typ, size, deci),value in zip(fieldTuples, recordContents): if typ in ("N","F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. value = value.split(b'\0')[0] value = value.replace(b'*', b'') # QGIS NULL is all '*' chars if value == b'': @@ -1654,7 +1654,7 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - value = int(value) + value = int(value) except ValueError: # forcing directly to int failed, so was probably a float. try: @@ -1698,7 +1698,7 @@ def __record(self, fieldTuples, recLookup, recStruct, oid=None): def record(self, i=0, fields=None): """Returns a specific dbf record based on the supplied index. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. + list of one or more fieldnames. """ f = self.__getFileObj(self.dbf) if self.numRecords is None: @@ -1711,7 +1711,7 @@ def record(self, i=0, fields=None): return self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) def records(self, fields=None): - """Returns all records in a dbf file. + """Returns all records in a dbf file. To only read some of the fields, specify the 'fields' arg as a list of one or more fieldnames. """ @@ -1745,11 +1745,11 @@ def iterRecords(self, fields=None): def shapeRecord(self, i=0, fields=None, bbox=None): """Returns a combination geometry and attribute record for the - supplied record index. + supplied record index. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. + list of one or more fieldnames. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. """ i = self.__restrictIndex(i) shape = self.shape(i, bbox=bbox) @@ -1761,9 +1761,9 @@ def shapeRecords(self, fields=None, bbox=None): """Returns a list of combination geometry/attribute records for all records in a shapefile. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. + list of one or more fieldnames. To only read entries within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ return ShapeRecords(self.iterShapeRecords(fields=fields, bbox=bbox)) @@ -1771,9 +1771,9 @@ def iterShapeRecords(self, fields=None, bbox=None): """Returns a generator of combination geometry/attribute records for all records in a shapefile. To only read some of the fields, specify the 'fields' arg as a - list of one or more fieldnames. + list of one or more fieldnames. To only read entries within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. + arg as a list or tuple of xmin,ymin,xmax,ymax. """ if bbox is None: # iterate through all shapes and records @@ -1782,13 +1782,13 @@ def iterShapeRecords(self, fields=None, bbox=None): else: # only iterate where shape.bbox overlaps with the given bbox # TODO: internal __record method should be faster but would have to - # make sure to seek to correct file location... + # make sure to seek to correct file location... #fieldTuples,recLookup,recStruct = self.__recordFields(fields) for shape in self.iterShapes(bbox=bbox): if shape: #record = self.__record(oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct) - record = self.record(i=shape.oid, fields=fields) + record = self.record(i=shape.oid, fields=fields) yield ShapeRecord(shape=shape, record=record) @@ -1819,8 +1819,8 @@ def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): else: raise Exception('Either the target filepath, or any of shp, shx, or dbf must be set to create a shapefile.') # Initiate with empty headers, to be finalized upon closing - if self.shp: self.shp.write(b'9'*100) - if self.shx: self.shx.write(b'9'*100) + if self.shp: self.shp.write(b'9'*100) + if self.shx: self.shx.write(b'9'*100) # Geometry record offsets and lengths for writing shx file. self.recNum = 0 self.shpNum = 0 @@ -1828,16 +1828,16 @@ def __init__(self, target=None, shapeType=None, autoBalance=False, **kwargs): self._zbox = None self._mbox = None # Use deletion flags in dbf? Default is false (0). Note: Currently has no effect, records should NOT contain deletion flags. - self.deletionFlag = 0 + self.deletionFlag = 0 # Encoding self.encoding = kwargs.pop('encoding', 'utf-8') self.encodingErrors = kwargs.pop('encodingErrors', 'strict') def __len__(self): - """Returns the current number of features written to the shapefile. + """Returns the current number of features written to the shapefile. If shapes and records are unbalanced, the length is considered the highest of the two.""" - return max(self.recNum, self.shpNum) + return max(self.recNum, self.shpNum) def __enter__(self): """ @@ -1862,7 +1862,7 @@ def close(self): shp_open = self.shp and not (hasattr(self.shp, 'closed') and self.shp.closed) shx_open = self.shx and not (hasattr(self.shx, 'closed') and self.shx.closed) dbf_open = self.dbf and not (hasattr(self.dbf, 'closed') and self.dbf.closed) - + # Balance if already not balanced if self.shp and shp_open and self.dbf and dbf_open: if self.autoBalance: @@ -1934,8 +1934,8 @@ def __bbox(self, s): y.extend(py) else: # this should not happen. - # any shape that is not null should have at least one point, and only those should be sent here. - # could also mean that earlier code failed to add points to a non-null shape. + # any shape that is not null should have at least one point, and only those should be sent here. + # could also mean that earlier code failed to add points to a non-null shape. raise Exception("Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." % s.shapeType) bbox = [min(x), min(y), max(x), max(y)] # update global @@ -2035,7 +2035,7 @@ def __shapefileHeader(self, fileObj, headerType='shp'): # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0,0,0,0] + bbox = [0,0,0,0] f.write(pack("<4d", *bbox)) except error: raise ShapefileException("Failed to write shapefile bounding box. Floats required.") @@ -2179,7 +2179,7 @@ def __shpRecord(self, s): f.write(pack("<%sd" % len(s.z), *s.z)) else: # if z values are stored as 3rd dimension - [f.write(pack(" 2 else 0)) for p in s.points] + [f.write(pack(" 2 else 0)) for p in s.points] except error: raise ShapefileException("Failed to write elevation values for record %s. Expected floats." % self.shpNum) # Write m extremes and values @@ -2191,7 +2191,7 @@ def __shpRecord(self, s): except error: raise ShapefileException("Failed to write measure extremes for record %s. Expected floats" % self.shpNum) try: - if hasattr(s,"m"): + if hasattr(s,"m"): # if m values are stored in attribute f.write(pack("<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m])) else: @@ -2239,7 +2239,7 @@ def __shpRecord(self, s): # if m values are stored in attribute try: if not s.m or s.m[0] is None: - s.m = (NODATA,) + s.m = (NODATA,) f.write(pack("<1d", s.m[0])) except error: raise ShapefileException("Failed to write measure value for record %s. Expected floats." % self.shpNum) @@ -2284,7 +2284,7 @@ def record(self, *recordList, **recordDict): # Balance if already not balanced if self.autoBalance and self.recNum > self.shpNum: self.balance() - + fieldCount = sum((1 for field in self.fields if field[0] != 'DeletionFlag')) if recordList: record = list(recordList) @@ -2322,7 +2322,7 @@ def __dbfRecord(self, record): self.recNum += 1 fields = (field for field in self.fields if field[0] != 'DeletionFlag') # ignore deletionflag field in case it was specified for (fieldName, fieldType, size, deci), value in zip(fields, record): - # write + # write fieldType = fieldType.upper() size = int(size) if fieldType in ("N","F"): @@ -2335,7 +2335,7 @@ def __dbfRecord(self, record): # first try to force directly to int. # forcing a large int to float and back to int # will lose information and result in wrong nr. - value = int(value) + value = int(value) except ValueError: # forcing directly to int failed, so was probably a float. value = int(float(value)) @@ -2415,7 +2415,7 @@ def pointz(self, x, y, z=0, m=None): pointShape = Shape(shapeType) pointShape.points.append([x, y, z, m]) self.shape(pointShape) - + def multipoint(self, points): """Creates a MULTIPOINT shape. @@ -2600,8 +2600,8 @@ def field(self, name, fieldType="C", size="50", decimal=0): ## be written exclusively using saveShp, saveShx, and saveDbf respectively. ## If target is specified but not shp, shx, or dbf then the target path and ## file name are used. If no options or specified, a unique base file name -## is generated to save the files and the base file name is returned as a -## string. +## is generated to save the files and the base file name is returned as a +## string. ## """ ## # Balance if already not balanced ## if shp and dbf: @@ -2624,7 +2624,7 @@ def field(self, name, fieldType="C", size="50", decimal=0): ## if not target: ## temp = tempfile.NamedTemporaryFile(prefix="shapefile_",dir=os.getcwd()) ## target = temp.name -## generated = True +## generated = True ## self.saveShp(target) ## self.shp.close() ## self.saveShx(target) @@ -2670,11 +2670,11 @@ def summarize(self): runner.summarize(verbosity) return failure_count - + if __name__ == "__main__": """ Doctests are contained in the file 'README.md', and are tested using the built-in - testing libraries. + testing libraries. """ failure_count = test() sys.exit(failure_count) diff --git a/test_shapefile.py b/test_shapefile.py index 774e59cd..ec73b457 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -293,7 +293,7 @@ def test_reader_zip(): pass assert len(sf) > 0 assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True - + # test require specific path when reading multi-shapefile zipfile with pytest.raises(shapefile.ShapefileException): with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip") as sf: @@ -584,7 +584,7 @@ def test_reader_shapefile_delayed_load(): with shapefile.Reader() as sf: # assert that data request raises exception, since no file has been provided yet with pytest.raises(shapefile.ShapefileException): - sf.shape(0) + sf.shape(0) # assert that works after loading file manually sf.load("shapefiles/blockgroups") assert len(sf) == 663 @@ -603,7 +603,7 @@ def test_records_match_shapes(): def test_record_attributes(fields=None): """ - Assert that record retrieves all relevant values and can + Assert that record retrieves all relevant values and can be accessed as attributes and dictionary items. """ # note @@ -634,7 +634,7 @@ def test_record_attributes(fields=None): def test_record_subfields(): """ - Assert that reader correctly retrieves only a subset + Assert that reader correctly retrieves only a subset of fields when specified. """ fields = ["AREA","POP1990","MALES","FEMALES","MOBILEHOME"] @@ -643,9 +643,9 @@ def test_record_subfields(): def test_record_subfields_unordered(): """ - Assert that reader correctly retrieves only a subset - of fields when specified, given in random order but - retrieved in the order of the shapefile fields. + Assert that reader correctly retrieves only a subset + of fields when specified, given in random order but + retrieved in the order of the shapefile fields. """ fields = sorted(["AREA","POP1990","MALES","FEMALES","MOBILEHOME"]) test_record_attributes(fields=fields) @@ -663,7 +663,7 @@ def test_record_subfields_delflag_notvalid(): def test_record_subfields_duplicates(): """ Assert that reader correctly retrieves only a subset - of fields when specified, handling duplicate input fields. + of fields when specified, handling duplicate input fields. """ fields = ["AREA","AREA","AREA","MALES","MALES","MOBILEHOME"] test_record_attributes(fields=fields) @@ -676,7 +676,7 @@ def test_record_subfields_duplicates(): def test_record_subfields_empty(): """ Assert that reader does not retrieve any fields when given - an empty list. + an empty list. """ fields = [] test_record_attributes(fields=fields) @@ -774,8 +774,8 @@ def test_shape_oid_no_shx(): def test_reader_offsets(): """ - Assert that reader will not read the shx offsets unless necessary, - i.e. requesting a shape index. + Assert that reader will not read the shx offsets unless necessary, + i.e. requesting a shape index. """ basename = "shapefiles/blockgroups" with shapefile.Reader(basename) as sf: @@ -788,8 +788,8 @@ def test_reader_offsets(): def test_reader_offsets_no_shx(): """ - Assert that reading a shapefile without a shx file will not build - the offsets unless necessary, i.e. reading all the shapes. + Assert that reading a shapefile without a shx file will not build + the offsets unless necessary, i.e. reading all the shapes. """ basename = "shapefiles/blockgroups" shp = open(basename + ".shp", 'rb') @@ -810,7 +810,7 @@ def test_reader_offsets_no_shx(): def test_reader_numshapes(): """ Assert that reader reads the numShapes attribute from the - shx file header during loading. + shx file header during loading. """ basename = "shapefiles/blockgroups" with shapefile.Reader(basename) as sf: @@ -839,8 +839,8 @@ def test_reader_numshapes_no_shx(): def test_reader_len(): """ - Assert that calling len() on reader is equal to length of - all shapes and records. + Assert that calling len() on reader is equal to length of + all shapes and records. """ basename = "shapefiles/blockgroups" with shapefile.Reader(basename) as sf: @@ -850,7 +850,7 @@ def test_reader_len(): def test_reader_len_not_loaded(): """ Assert that calling len() on reader that hasn't loaded a shapefile - yet is equal to 0. + yet is equal to 0. """ with shapefile.Reader() as sf: assert len(sf) == 0 @@ -859,7 +859,7 @@ def test_reader_len_not_loaded(): def test_reader_len_dbf_only(): """ Assert that calling len() on reader when reading a dbf file only, - is equal to length of all records. + is equal to length of all records. """ basename = "shapefiles/blockgroups" dbf = open(basename + ".dbf", 'rb') @@ -870,7 +870,7 @@ def test_reader_len_dbf_only(): def test_reader_len_no_dbf(): """ Assert that calling len() on reader when dbf file is missing, - is equal to length of all shapes. + is equal to length of all shapes. """ basename = "shapefiles/blockgroups" shp = open(basename + ".shp", 'rb') @@ -882,7 +882,7 @@ def test_reader_len_no_dbf(): def test_reader_len_no_dbf_shx(): """ Assert that calling len() on reader when dbf and shx file is missing, - is equal to length of all shapes. + is equal to length of all shapes. """ basename = "shapefiles/blockgroups" shp = open(basename + ".shp", 'rb') @@ -893,7 +893,7 @@ def test_reader_len_no_dbf_shx(): def test_reader_corrupt_files(): """ Assert that reader is able to handle corrupt files by - strictly going off the header information. + strictly going off the header information. """ basename = "shapefiles/test/corrupt_too_long" @@ -931,7 +931,7 @@ def test_reader_corrupt_files(): def test_bboxfilter_shape(): """ Assert that applying the bbox filter to shape() correctly ignores the shape - if it falls outside, and returns it if inside. + if it falls outside, and returns it if inside. """ inside = [-122.4, 37.8, -122.35, 37.82] outside = list(inside) @@ -945,7 +945,7 @@ def test_bboxfilter_shape(): def test_bboxfilter_shapes(): """ Assert that applying the bbox filter to shapes() correctly ignores shapes - that fall outside, and returns those that fall inside. + that fall outside, and returns those that fall inside. """ bbox = [-122.4, 37.8, -122.35, 37.82] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -967,7 +967,7 @@ def test_bboxfilter_shapes(): def test_bboxfilter_shapes_outside(): """ Assert that applying the bbox filter to shapes() correctly returns - no shapes when the bbox is outside the entire shapefile. + no shapes when the bbox is outside the entire shapefile. """ bbox = [-180, 89, -179, 90] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -978,7 +978,7 @@ def test_bboxfilter_shapes_outside(): def test_bboxfilter_itershapes(): """ Assert that applying the bbox filter to iterShapes() correctly ignores shapes - that fall outside, and returns those that fall inside. + that fall outside, and returns those that fall inside. """ bbox = [-122.4, 37.8, -122.35, 37.82] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -1000,7 +1000,7 @@ def test_bboxfilter_itershapes(): def test_bboxfilter_shaperecord(): """ Assert that applying the bbox filter to shapeRecord() correctly ignores the shape - if it falls outside, and returns it if inside. + if it falls outside, and returns it if inside. """ inside = [-122.4, 37.8, -122.35, 37.82] outside = list(inside) @@ -1018,7 +1018,7 @@ def test_bboxfilter_shaperecord(): def test_bboxfilter_shaperecords(): """ Assert that applying the bbox filter to shapeRecords() correctly ignores shapes - that fall outside, and returns those that fall inside. + that fall outside, and returns those that fall inside. """ bbox = [-122.4, 37.8, -122.35, 37.82] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -1046,7 +1046,7 @@ def test_bboxfilter_shaperecords(): def test_bboxfilter_itershaperecords(): """ Assert that applying the bbox filter to iterShapeRecords() correctly ignores shapes - that fall outside, and returns those that fall inside. + that fall outside, and returns those that fall inside. """ bbox = [-122.4, 37.8, -122.35, 37.82] with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -1153,7 +1153,7 @@ def test_write_shp_only(tmpdir): # assert test.shp exists assert os.path.exists(filename+'.shp') - + # test that can read shapes with shapefile.Reader(shp=filename+'.shp') as reader: assert reader.shp and not reader.shx and not reader.dbf @@ -1220,7 +1220,7 @@ def test_write_shp_dbf_only(tmpdir): # assert test.dbf exists assert os.path.exists(filename+'.dbf') - + # test that can read records and shapes with shapefile.Reader(shp=filename+'.shp', dbf=filename+'.dbf') as reader: assert reader.shp and not reader.shx and reader.dbf @@ -1308,7 +1308,7 @@ def test_write_filelike(tmpdir): writer.field('field1', 'C') # required to create a valid dbf file writer.record('value') writer.null() - + # test that filelike objects were written correctly with shapefile.Reader(shp=shp, shx=shx, dbf=dbf) as reader: assert len(reader) == 1 @@ -1431,17 +1431,17 @@ def test_write_shapefile_extension_ignored(tmpdir): def test_write_record(tmpdir): """ Test that .record() correctly writes a record using either a list of *args - or a dict of **kwargs. + or a dict of **kwargs. """ filename = tmpdir.join("test.shp").strpath with shapefile.Writer(filename) as writer: writer.autoBalance = True - writer.field('one', 'C') - writer.field('two', 'C') - writer.field('three', 'C') - writer.field('four', 'C') - + writer.field('one', 'C') + writer.field('two', 'C') + writer.field('three', 'C') + writer.field('four', 'C') + values = ['one','two','three','four'] writer.record(*values) writer.record(*values) @@ -1458,17 +1458,17 @@ def test_write_record(tmpdir): def test_write_partial_record(tmpdir): """ Test that .record() correctly writes a partial record (given only some of the values) - using either a list of *args or a dict of **kwargs. Should fill in the gaps. + using either a list of *args or a dict of **kwargs. Should fill in the gaps. """ filename = tmpdir.join("test.shp").strpath with shapefile.Writer(filename) as writer: writer.autoBalance = True - - writer.field('one', 'C') - writer.field('two', 'C') - writer.field('three', 'C') - writer.field('four', 'C') - + + writer.field('one', 'C') + writer.field('two', 'C') + writer.field('three', 'C') + writer.field('four', 'C') + values = ['one','two'] writer.record(*values) writer.record(*values) @@ -1519,7 +1519,7 @@ def test_write_geojson(tmpdir): @pytest.mark.parametrize("shape_type", shape_types) def test_write_empty_shapefile(tmpdir, shape_type): """ - Assert that can write an empty shapefile, for all different shape types. + Assert that can write an empty shapefile, for all different shape types. """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename, shapeType=shape_type) as w: From 9c4cfba56bf1b9d6cdc8cb5a86d8efabfdad7a4d Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Wed, 28 Aug 2024 06:57:29 -0700 Subject: [PATCH 033/115] changelog.txt: strip trailing spaces and keep as dos formatted file. --- changelog.txt | 94 +++++++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/changelog.txt b/changelog.txt index 1735183f..291a2e81 100644 --- a/changelog.txt +++ b/changelog.txt @@ -9,7 +9,7 @@ VERSION 2.3.0 2022-04-30 New Features: - * Added support for pathlib and path-like shapefile filepaths (@mwtoews). + * Added support for pathlib and path-like shapefile filepaths (@mwtoews). * Allow reading individual file extensions via filepaths. Improvements: @@ -21,7 +21,7 @@ VERSION 2.3.0 * More robust handling of corrupt shapefiles (fixes #235) * Fix errors when writing to individual file-handles (fixes #237) * Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) - * Fix test issues in environments without network access (@sebastic, @musicinmybrain). + * Fix test issues in environments without network access (@sebastic, @musicinmybrain). VERSION 2.2.0 @@ -33,7 +33,7 @@ VERSION 2.2.0 * Allow fast filtering which shapes to read from the file through a `bbox` arg. Improvements: - * More examples and restructuring of README. + * More examples and restructuring of README. * More informative Shape to geojson warnings (see #219). * Add shapefile.VERBOSE flag to control warnings verbosity (default True). * Shape object information when calling repr(). @@ -88,7 +88,7 @@ VERSION 2.1.0 2019-02-15 New Features: - * Added back read/write support for unicode field names. + * Added back read/write support for unicode field names. * Improved Record representation * More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() @@ -100,54 +100,54 @@ VERSION 2.1.0 VERSION 2.0.1 2018-11-05 - * Fix pip install setup.py README decoding error. + * Fix pip install setup.py README decoding error. VERSION 2.0.0 2018-09-01 (Note: Some contributor attributions may be missing.) New Features: - * Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. - * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. + * Full support for unicode text, with custom encoding, and exception handling. + - Means that the Reader returns unicode, and the Writer accepts unicode. + * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. * Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. + - Specify filepath/destination and text encoding when creating the Writer. + - The file is written incrementally with each call to shape/record. + - Adding shapes is now done using dedicated methods for each shapetype. * Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. + - Shapefiles can be opened using the context manager, and files are properly closed. + - Shapefiles can be iterated, have a length, and supports the geo interface. - New ways of inspecing shapefile metadata by printing. [@megies] - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] - * Add more support and documentation for MultiPatch 3D shapes. - * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. - * Better documentation of previously unclear aspects, such as field types. + - More convenient shape type name checking. [@megies] + * Add more support and documentation for MultiPatch 3D shapes. + * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. + * Better documentation of previously unclear aspects, such as field types. Bug Fixes: * More reliable/robust: - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. + - Improved parsing of field value types, fixed errors and made more flexible. - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] * Fix some geo interface errors, including checking polygon directions. * Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] * Enforce maximum field limit. [@mwtoews] - + VERSION 1.2.12 * ? - + VERSION 1.2.11 2017-04-29 Karim Bahgat - * Fixed bugs when reading and writing empty shapefiles. + * Fixed bugs when reading and writing empty shapefiles. * Fixed bug when writing null geometry. * Fixed misc data type errors. * Fixed error when reading files with wrong record length. - * Use max field precision when saving decimal numbers. - * Improved shapetype detection. - * Expanded docs on data types. - * General doc additions and travis icon. + * Use max field precision when saving decimal numbers. + * Improved shapetype detection. + * Expanded docs on data types. + * General doc additions and travis icon. VERSION 1.2.10 @@ -162,7 +162,7 @@ VERSION 1.2.9 VERSION 1.2.8 2016-08-17 Joel Lawhead - * Configured Travis-CI + * Configured Travis-CI VERSION 1.2.5 @@ -211,9 +211,9 @@ VERSION 1.2.0 *README.txt add example/test for writing a 3D polygon VERSION 1.1.9 - + 2013-07-27 Joel Lawhead - *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer + *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer when referencing "z" and "m" values. This bug caused errors only when editing 3D shapefiles. @@ -233,39 +233,39 @@ VERSION 1.1.7 2013-06-22 Joel Lawhead - *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention + *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention to export shapefiles as GeoJSON. - - *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed + + *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed as unicode strings. - - *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through + + *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through geometry records for parsing large files efficiently. - - *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through + + *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through dbf records efficiently in large files. - - *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx + + *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx file is not available. - + *shapefile.py (main) Added __version__ attribute. - - *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to + + *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to dbf fields. *shapefile.py (Reader.__shape) Updated to calculate and seek the start of the next record. The shapefile spec does not require the content of a geometry record to be as long as the content - length defined in the header. The result is you can delete features without modifying the + length defined in the header. The result is you can delete features without modifying the record header allowing for empty space in records. - + *shapefile.py (Writer.poly) Added enforcement of closed polygons - + *shapefile.py (Writer.save) Added unique file name generator to use if no file names are passed to a writer instance when saving (ex. w.save()). The unique file name is returned as a string. - + *README.txt (main) Added tests for iterShapes(), iterRecords(), __geo_interface__() - + *README.txt (main) Updated "bbox" property documentation to match Esri specification. - - + + From db89f75b6ed42ef0062f479e5d30663cbe135276 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sat, 7 Sep 2024 15:05:51 +0000 Subject: [PATCH 034/115] build.yml: Change python-versions to leave off patch version. This makes sure that the build uses the latest patch version for each python major.minor. Keep the pre-release for 3.13 explicit as I don't know how that will work before the first 3.13 release is out. Also make each version number be on a separate line so that adding and removing python versions is easier to see in diffs, e.g., like this: https://github.com/google/earthengine-api/blob/e44bb391af90c9b7dfde79d95b55116e79d0d6bf/.github/workflows/ci-tests.yml#L17 --- .github/workflows/build.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b1b885cf..74c22de6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,18 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["2.7.18", "3.5.10", "3.6.15", "3.7.17", "3.8.18", "3.9.18", "3.10.13", "3.11.7", "3.12.1", "3.13.0a2"] + python-version: [ + "2.7", + "3.5", + "3.6", + "3.7", + "3.8", + "3.9", + "3.10", + "3.11", + "3.12", + "3.13.0a2", + ] runs-on: ubuntu-latest container: From 4c92dbf637898b5b2bb8a0a6ff270d4ffe24e4cb Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sat, 7 Sep 2024 15:11:59 +0000 Subject: [PATCH 035/115] README.md and changelog.txt: codespell --- README.md | 2 +- changelog.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c3b1bb9b..94861bb2 100644 --- a/README.md +++ b/README.md @@ -876,7 +876,7 @@ None is interpreted as missing. >>> w.record(False) >>> w.record(0) >>> w.record(None) - >>> w.record("Nonesense") + >>> w.record("Nonsense") >>> w.close() >>> r = shapefile.Reader('shapefiles/test/dtype') diff --git a/changelog.txt b/changelog.txt index 291a2e81..80b88e26 100644 --- a/changelog.txt +++ b/changelog.txt @@ -171,7 +171,7 @@ VERSION 1.2.5 * Merge README text into markdown file. Remove text version. * Fixed parsing of number of points for some shapes (MULTIPOINTM, MULTIPOINTZ) -VERSON 1.2.3 +VERSION 1.2.3 2015-06-21 Joel Lawhead *shapefile.py (u) Bugfix for Python3 with Reader.iterShapeRecords() From e0780cab0bbd42ac0207acb48a241a311d18a318 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 12:05:57 +0100 Subject: [PATCH 036/115] Bump actions/checkout to v4 (to suppress old Node version warning) --- .github/workflows/build.yml | 2 +- .github/workflows/deploy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 74c22de6..dfeae580 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,7 +33,7 @@ jobs: image: python:${{ matrix.python-version }}-slim steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9299c686..70db5f72 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v2 with: From 9582858333b88ad93bccfac7fab7e0da0e136776 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 12:28:23 +0100 Subject: [PATCH 037/115] Delete dormant commented code blocks They live on in the commit history, if required. --- shapefile.py | 113 --------------------------------------------------- 1 file changed, 113 deletions(-) diff --git a/shapefile.py b/shapefile.py index 26d93dc6..beab2362 100644 --- a/shapefile.py +++ b/shapefile.py @@ -874,42 +874,6 @@ class ShapefileException(Exception): """An exception to handle shapefile specific problems.""" pass -# def warn_geojson_collection(shapes): -# # collect information about any potential errors with the GeoJSON -# errors = {} -# for i,shape in enumerate(shapes): -# shape_errors = shape._errors -# if shape_errors: -# for error in shape_errors.keys(): -# errors[error] = errors[error] + [i] if error in errors else [] - -# # warn if any errors were found -# if errors: -# messages = ['Summary of possibles issues encountered during shapefile to GeoJSON conversion:'] - -# # polygon orphan holes -# orphans = errors.get('polygon_orphaned_holes', None) -# if orphans: -# msg = 'GeoJSON format requires that all interior holes be contained by an exterior ring, \ -# but the Shapefile contained {} records of polygons where some of its interior holes were \ -# orphaned (not contained by any other rings). The rings were still included but were \ -# encoded as GeoJSON exterior rings instead of holes. Shape ids: {}'.format(len(orphans), orphans) -# messages.append(msg) - -# # polygon only holes/wrong orientation -# only_holes = errors.get('polygon_only_holes', None) -# if only_holes: -# msg = 'GeoJSON format requires that polygons contain at least one exterior ring, but \ -# the Shapefile contained {} records of polygons where all of its component rings were stored as interior \ -# holes. The rings were still included but were encoded as GeoJSON exterior rings instead of holes. \ -# Shape ids: {}'.format(len(only_holes), only_holes) -# messages.append(msg) - -# if len(messages) > 1: -# # more than just the "Summary of..." header -# msg = '\n'.join(messages) -# logger.warning(msg) - class Reader(object): """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, @@ -2556,83 +2520,6 @@ def field(self, name, fieldType="C", size="50", decimal=0): "Shapefile Writer reached maximum number of fields: 2046.") self.fields.append((name, fieldType, size, decimal)) -## def saveShp(self, target): -## """Save an shp file.""" -## if not hasattr(target, "write"): -## target = os.path.splitext(target)[0] + '.shp' -## self.shp = self.__getFileObj(target) -## self.__shapefileHeader(self.shp, headerType='shp') -## self.shp.seek(100) -## self._shp.seek(0) -## chunk = True -## while chunk: -## chunk = self._shp.read(self.bufsize) -## self.shp.write(chunk) -## -## def saveShx(self, target): -## """Save an shx file.""" -## if not hasattr(target, "write"): -## target = os.path.splitext(target)[0] + '.shx' -## self.shx = self.__getFileObj(target) -## self.__shapefileHeader(self.shx, headerType='shx') -## self.shx.seek(100) -## self._shx.seek(0) -## chunk = True -## while chunk: -## chunk = self._shx.read(self.bufsize) -## self.shx.write(chunk) -## -## def saveDbf(self, target): -## """Save a dbf file.""" -## if not hasattr(target, "write"): -## target = os.path.splitext(target)[0] + '.dbf' -## self.dbf = self.__getFileObj(target) -## self.__dbfHeader() # writes to .dbf -## self._dbf.seek(0) -## chunk = True -## while chunk: -## chunk = self._dbf.read(self.bufsize) -## self.dbf.write(chunk) - -## def save(self, target=None, shp=None, shx=None, dbf=None): -## """Save the shapefile data to three files or -## three file-like objects. SHP and DBF files can also -## be written exclusively using saveShp, saveShx, and saveDbf respectively. -## If target is specified but not shp, shx, or dbf then the target path and -## file name are used. If no options or specified, a unique base file name -## is generated to save the files and the base file name is returned as a -## string. -## """ -## # Balance if already not balanced -## if shp and dbf: -## if self.autoBalance: -## self.balance() -## if self.recNum != self.shpNum: -## raise ShapefileException("When saving both the dbf and shp file, " -## "the number of records (%s) must correspond " -## "with the number of shapes (%s)" % (self.recNum, self.shpNum)) -## # Save -## if shp: -## self.saveShp(shp) -## if shx: -## self.saveShx(shx) -## if dbf: -## self.saveDbf(dbf) -## # Create a unique file name if one is not defined -## if not shp and not shx and not dbf: -## generated = False -## if not target: -## temp = tempfile.NamedTemporaryFile(prefix="shapefile_",dir=os.getcwd()) -## target = temp.name -## generated = True -## self.saveShp(target) -## self.shp.close() -## self.saveShx(target) -## self.shx.close() -## self.saveDbf(target) -## self.dbf.close() -## if generated: -## return target # Begin Testing def test(**kwargs): From 8a4d14b12161f4a09779a99a426c1c4a620d7255 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:17:36 +0100 Subject: [PATCH 038/115] Create .gitattributes, to enforce crlf line endings consistently --- .gitattributes | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..3dc24e59 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +eol=crlf \ No newline at end of file From ef52ba28572609931069ab3f4cc00d35a5c4fdad Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:30:31 +0100 Subject: [PATCH 039/115] Update .gitattributes --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 3dc24e59..f91ce585 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -eol=crlf \ No newline at end of file +* eol=crlf \ No newline at end of file From b6799f37807d27ae071ca42f60a4e391f4ba8276 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 08:41:18 +0100 Subject: [PATCH 040/115] Use lf instead of crlf to avoid apparent corruption of checked in shapefile .dbfs --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index f91ce585..44b4224b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -* eol=crlf \ No newline at end of file +* eol=lf \ No newline at end of file From 4b22a1a7e5f415b22a3b6a571ae61e4e8e12952c Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 09:58:03 +0100 Subject: [PATCH 041/115] Update .gitattributes --- .gitattributes | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 44b4224b..4e7261fc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ -* eol=lf \ No newline at end of file +* text eol=lf +shapefiles binary From fa2b0a769fd9cb1b64656006b87d82bfb57a87db Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:02:56 +0100 Subject: [PATCH 042/115] Update .gitattributes --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 4e7261fc..7435e016 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,2 @@ * text eol=lf -shapefiles binary +shapefiles/**/* binary From bd0409439b10f9f381fe1e8aaed6d1e26fdb0806 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Sun, 15 Sep 2024 10:08:00 +0100 Subject: [PATCH 043/115] Change line endings to LF --- LICENSE.TXT | 18 +- changelog.txt | 542 +++--- shapefile.py | 5134 ++++++++++++++++++++++++------------------------- 3 files changed, 2847 insertions(+), 2847 deletions(-) diff --git a/LICENSE.TXT b/LICENSE.TXT index b7d72761..ce33f7ba 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -1,9 +1,9 @@ -The MIT License (MIT) - -Copyright © 2013 Joel Lawhead - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +The MIT License (MIT) + +Copyright © 2013 Joel Lawhead + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/changelog.txt b/changelog.txt index 80b88e26..533d704e 100644 --- a/changelog.txt +++ b/changelog.txt @@ -1,271 +1,271 @@ - -VERSION 2.3.1 - -2022-07-28 - Bug fixes: - * Fix recently introduced issue where Reader/Writer closes file-like objects provided by user (#244) - -VERSION 2.3.0 - -2022-04-30 - New Features: - * Added support for pathlib and path-like shapefile filepaths (@mwtoews). - * Allow reading individual file extensions via filepaths. - - Improvements: - * Simplified setup and deployment (@mwtoews) - * Faster shape access when missing shx file - * Switch to named logger (see #240) - - Bug fixes: - * More robust handling of corrupt shapefiles (fixes #235) - * Fix errors when writing to individual file-handles (fixes #237) - * Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) - * Fix test issues in environments without network access (@sebastic, @musicinmybrain). - -VERSION 2.2.0 - -2022-02-02 - New Features: - * Read shapefiles directly from zipfiles. - * Read shapefiles directly from urls. - * Allow fast extraction of only a subset of dbf fields through a `fields` arg. - * Allow fast filtering which shapes to read from the file through a `bbox` arg. - - Improvements: - * More examples and restructuring of README. - * More informative Shape to geojson warnings (see #219). - * Add shapefile.VERBOSE flag to control warnings verbosity (default True). - * Shape object information when calling repr(). - * Faster ring orientation checks, enforce geojson output ring orientation. - - Bug fixes: - * Remove null-padding at end of some record character fields. - * Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. - * Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) - * Fix bug where records and shapes would be assigned incorrect record number (@karanrn) - * Fix typos in docs (@timgates) - -VERSION 2.1.3 - -2021-01-14 - Bug fixes: - * Fix recent bug in geojson hole-in-polygon checking (see #205) - * Misc fixes to allow geo interface dump to json (eg dates as strings) - * Handle additional dbf date null values, and return faulty dates as unicode (see #187) - * Add writer target typecheck - * Fix bugs to allow reading shp/shx/dbf separately - * Allow delayed shapefile loading by passing no args - * Fix error with writing empty z/m shapefile (@mcuprjak) - * Fix signed_area() so ignores z/m coords - * Enforce writing the 11th field name character as null-terminator (only first 10 are used) - * Minor README fixes - * Added more tests - -VERSION 2.1.2 - -2020-09-10 - Bug fixes: - * Fix issue where `warnings.simplefilter('always')` changes global warning behavior [see #203] - -VERSION 2.1.1 - -2020-09-09 - Improvements: - * Handle shapes with no coords and represent as geojson with no coords (GeoJSON null-equivalent) - * Expand testing to Python 3.6, 3.7, 3.8 and PyPy; drop 3.3 and 3.4 [@mwtoews] - * Added pytest testing [@jmoujaes] - - Bug fixes: - * Fix incorrect geo interface handling of multipolygons with complex exterior-hole relations [see #202] - * Enforce shapefile requirement of at least one field, to avoid writing invalid shapefiles [@Jonty] - * Fix Reader geo interface including DeletionFlag field in feature properties [@nnseva] - * Fix polygons not being auto closed, which was accidentally dropped - * Fix error for null geometries in feature geojson - * Misc docstring cleanup [@fiveham] - -VERSION 2.1.0 - -2019-02-15 - New Features: - * Added back read/write support for unicode field names. - * Improved Record representation - * More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() - - Bug fixes: - * Fixed error when reading optional m-values - * Fixed Record attribute autocomplete in Python 3 - * Misc readme cleanup - -VERSION 2.0.1 - -2018-11-05 - * Fix pip install setup.py README decoding error. - -VERSION 2.0.0 - -2018-09-01 - (Note: Some contributor attributions may be missing.) - New Features: - * Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. - * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. - * Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. - * Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. - - New ways of inspecing shapefile metadata by printing. [@megies] - - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] - * Add more support and documentation for MultiPatch 3D shapes. - * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. - * Better documentation of previously unclear aspects, such as field types. - - Bug Fixes: - * More reliable/robust: - - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. - - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] - * Fix some geo interface errors, including checking polygon directions. - * Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] - * Enforce maximum field limit. [@mwtoews] - -VERSION 1.2.12 - * ? - -VERSION 1.2.11 - -2017-04-29 Karim Bahgat - * Fixed bugs when reading and writing empty shapefiles. - * Fixed bug when writing null geometry. - * Fixed misc data type errors. - * Fixed error when reading files with wrong record length. - * Use max field precision when saving decimal numbers. - * Improved shapetype detection. - * Expanded docs on data types. - * General doc additions and travis icon. - -VERSION 1.2.10 - -2016-09-24 Karim Bahgat - * Bump version to fix pip install issue. - -VERSION 1.2.9 - -2016-09-22 Karim Bahgat - * Revert back to fix #66. - -VERSION 1.2.8 - -2016-08-17 Joel Lawhead - * Configured Travis-CI - -VERSION 1.2.5 - -2016-08-16 Joel Lawhead - * Reader speed up through batch unpacking bytes - * Merge README text into markdown file. Remove text version. - * Fixed parsing of number of points for some shapes (MULTIPOINTM, MULTIPOINTZ) - -VERSION 1.2.3 - -2015-06-21 Joel Lawhead - *shapefile.py (u) Bugfix for Python3 with Reader.iterShapeRecords() - -VERSION 1.2.2 - -### upcoming (2015/01/09 05:27 +00:00) -- [#11](https://github.com/geospatialpython/pyshp/pull/11) Merge pull request #11 from 7mp/master (@7mp) -- [#1](https://github.com/geospatialpython/pyshp/pull/1) Merge pull request #1 from rgbkrk/patch-1 (@rgbkrk) -- [#13](https://github.com/geospatialpython/pyshp/pull/13) Merge pull request #13 from jzmiller1/patch-1 (@jzmiller1) -- [#16](https://github.com/geospatialpython/pyshp/pull/16) Merge pull request #16 from riggsd/null-friendly (@riggsd) -- [#17](https://github.com/geospatialpython/pyshp/pull/17) Merge pull request #17 from riggsd/no-asserts (@riggsd) -- [#19](https://github.com/geospatialpython/pyshp/pull/19) Merge pull request #19 from razzius/master (@razzius) -- [#20](https://github.com/geospatialpython/pyshp/pull/20) Merge pull request #20 from Brideau/patch-1 (@Brideau) -- [12d69d4](https://github.com/GeospatialPython/pyshp/commit/12d69d47d8c90b445ea22bf5d9530b0c1c710de5) Updated to version 1.2.1 to match PyPI (@GeospatialPython) -- [05b69dc](https://github.com/GeospatialPython/pyshp/commit/05b69dc6b3d58c0dc9a822f6c4b8d45cf8dc9d94) Updated to version 1.2.1 to match PyPI (@GeospatialPython) -- [d2e9f1a](https://github.com/GeospatialPython/pyshp/commit/d2e9f1a41d02cf932484111f45c31781d1f7385a) Typo: recordsIter should be iterRecords (@Brideau) -- [a965aff](https://github.com/GeospatialPython/pyshp/commit/a965aff230aa3f3b85016f7b627609c7e53a2cf9) Format README code sample (@razzius) -- [66e1802](https://github.com/GeospatialPython/pyshp/commit/66e1802013fd3535baa505e15625afaa895ef819) Raise ShapefileException for runtime errors rather than using `assert()` -- [d72723c](https://github.com/GeospatialPython/pyshp/commit/d72723c9e38db8e859b79d95a65c00af1c2ba8ba) Return None when parsing (illegal) NULL attribute values in numeric and date fields, like those produced by QGIS -- [783e68c](https://github.com/GeospatialPython/pyshp/commit/783e68c75b8f20c7656ea470dbc5e9496a8ee0ac) Update link to "XBase File Format Description" (@jzmiller1) -- [79cc409](https://github.com/GeospatialPython/pyshp/commit/79cc409362a24caf4a21923419490ee95d557dc3) Added `Reader.iterShapeRecords` to help work with larger files -- [18c5521](https://github.com/GeospatialPython/pyshp/commit/18c5521b89cd1d7968dff8eb03c1ec37ab4307c5) URL Change (@rgbkrk) -- [202143c](https://github.com/GeospatialPython/pyshp/commit/202143c823407ffea07b5400e77b9ded7169f696) README.md TOC Take 2 -- [2cca75c](https://github.com/GeospatialPython/pyshp/commit/2cca75cd09b27bb19a77ffeb68afc535e3c33802) README.md TOC -- [8b5e994](https://github.com/GeospatialPython/pyshp/commit/8b5e994905fd4a70c0f7ce6d814346e6666b280c) README.md -- [f31a3d7](https://github.com/GeospatialPython/pyshp/commit/f31a3d773dd22e65d3e38ad8b034f186a05b4c4d) Update README.txt (@GeospatialPython) - -VERSION 1.2.1 - -2014-05-11 Joel Lawhead - *shapefile.py (u) fixed bug which failed to properly read some dbf fields in Python 3 - -VERSION 1.2.0 - -2013-09-05 Joel Lawhead - *README.txt add example/test for writing a 3D polygon - -VERSION 1.1.9 - -2013-07-27 Joel Lawhead - *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer - when referencing "z" and "m" values. This bug caused errors only when editing - 3D shapefiles. - -VERSION 1.1.8 - -2013-07-02 Joel Lawhead - *shapefile.py (Writer.poly()) fixed a bug that resulted in incorrect part indexes - *README.txt updated several errors in the documentation. - -2013-06-25 Joel Lawhead - *shapefile.py (Reader.shapes(),Reader.iterShapes()) Updated to verify the file length by - seeking to the end. A user reported shapefiles in the wild which had incorrect .shp file - lengths reported in the header which crashed when reading or iterating shapes. Most - insist on using the .shx file but there's no real reason to do so. - -VERSION 1.1.7 - -2013-06-22 Joel Lawhead - - *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention - to export shapefiles as GeoJSON. - - *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed - as unicode strings. - - *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through - geometry records for parsing large files efficiently. - - *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through - dbf records efficiently in large files. - - *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx - file is not available. - - *shapefile.py (main) Added __version__ attribute. - - *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to - dbf fields. - - *shapefile.py (Reader.__shape) Updated to calculate and seek the start of the next record. The - shapefile spec does not require the content of a geometry record to be as long as the content - length defined in the header. The result is you can delete features without modifying the - record header allowing for empty space in records. - - *shapefile.py (Writer.poly) Added enforcement of closed polygons - - *shapefile.py (Writer.save) Added unique file name generator to use if no file names are passed - to a writer instance when saving (ex. w.save()). The unique file name is returned as a string. - - *README.txt (main) Added tests for iterShapes(), iterRecords(), __geo_interface__() - - *README.txt (main) Updated "bbox" property documentation to match Esri specification. - - - + +VERSION 2.3.1 + +2022-07-28 + Bug fixes: + * Fix recently introduced issue where Reader/Writer closes file-like objects provided by user (#244) + +VERSION 2.3.0 + +2022-04-30 + New Features: + * Added support for pathlib and path-like shapefile filepaths (@mwtoews). + * Allow reading individual file extensions via filepaths. + + Improvements: + * Simplified setup and deployment (@mwtoews) + * Faster shape access when missing shx file + * Switch to named logger (see #240) + + Bug fixes: + * More robust handling of corrupt shapefiles (fixes #235) + * Fix errors when writing to individual file-handles (fixes #237) + * Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) + * Fix test issues in environments without network access (@sebastic, @musicinmybrain). + +VERSION 2.2.0 + +2022-02-02 + New Features: + * Read shapefiles directly from zipfiles. + * Read shapefiles directly from urls. + * Allow fast extraction of only a subset of dbf fields through a `fields` arg. + * Allow fast filtering which shapes to read from the file through a `bbox` arg. + + Improvements: + * More examples and restructuring of README. + * More informative Shape to geojson warnings (see #219). + * Add shapefile.VERBOSE flag to control warnings verbosity (default True). + * Shape object information when calling repr(). + * Faster ring orientation checks, enforce geojson output ring orientation. + + Bug fixes: + * Remove null-padding at end of some record character fields. + * Fix dbf writing error when the number of record list or dict entries didn't match the number of fields. + * Handle rare garbage collection issue after deepcopy (https://github.com/mattijn/topojson/issues/120) + * Fix bug where records and shapes would be assigned incorrect record number (@karanrn) + * Fix typos in docs (@timgates) + +VERSION 2.1.3 + +2021-01-14 + Bug fixes: + * Fix recent bug in geojson hole-in-polygon checking (see #205) + * Misc fixes to allow geo interface dump to json (eg dates as strings) + * Handle additional dbf date null values, and return faulty dates as unicode (see #187) + * Add writer target typecheck + * Fix bugs to allow reading shp/shx/dbf separately + * Allow delayed shapefile loading by passing no args + * Fix error with writing empty z/m shapefile (@mcuprjak) + * Fix signed_area() so ignores z/m coords + * Enforce writing the 11th field name character as null-terminator (only first 10 are used) + * Minor README fixes + * Added more tests + +VERSION 2.1.2 + +2020-09-10 + Bug fixes: + * Fix issue where `warnings.simplefilter('always')` changes global warning behavior [see #203] + +VERSION 2.1.1 + +2020-09-09 + Improvements: + * Handle shapes with no coords and represent as geojson with no coords (GeoJSON null-equivalent) + * Expand testing to Python 3.6, 3.7, 3.8 and PyPy; drop 3.3 and 3.4 [@mwtoews] + * Added pytest testing [@jmoujaes] + + Bug fixes: + * Fix incorrect geo interface handling of multipolygons with complex exterior-hole relations [see #202] + * Enforce shapefile requirement of at least one field, to avoid writing invalid shapefiles [@Jonty] + * Fix Reader geo interface including DeletionFlag field in feature properties [@nnseva] + * Fix polygons not being auto closed, which was accidentally dropped + * Fix error for null geometries in feature geojson + * Misc docstring cleanup [@fiveham] + +VERSION 2.1.0 + +2019-02-15 + New Features: + * Added back read/write support for unicode field names. + * Improved Record representation + * More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() + + Bug fixes: + * Fixed error when reading optional m-values + * Fixed Record attribute autocomplete in Python 3 + * Misc readme cleanup + +VERSION 2.0.1 + +2018-11-05 + * Fix pip install setup.py README decoding error. + +VERSION 2.0.0 + +2018-09-01 + (Note: Some contributor attributions may be missing.) + New Features: + * Full support for unicode text, with custom encoding, and exception handling. + - Means that the Reader returns unicode, and the Writer accepts unicode. + * PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. + * Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: + - Specify filepath/destination and text encoding when creating the Writer. + - The file is written incrementally with each call to shape/record. + - Adding shapes is now done using dedicated methods for each shapetype. + * Reading shapefiles is now more convenient: + - Shapefiles can be opened using the context manager, and files are properly closed. + - Shapefiles can be iterated, have a length, and supports the geo interface. + - New ways of inspecing shapefile metadata by printing. [@megies] + - More convenient accessing of Record values as attributes. [@philippkraft] + - More convenient shape type name checking. [@megies] + * Add more support and documentation for MultiPatch 3D shapes. + * The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. + * Better documentation of previously unclear aspects, such as field types. + + Bug Fixes: + * More reliable/robust: + - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] + - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] + - Improved parsing of field value types, fixed errors and made more flexible. + - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] + * Fix some geo interface errors, including checking polygon directions. + * Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] + * Enforce maximum field limit. [@mwtoews] + +VERSION 1.2.12 + * ? + +VERSION 1.2.11 + +2017-04-29 Karim Bahgat + * Fixed bugs when reading and writing empty shapefiles. + * Fixed bug when writing null geometry. + * Fixed misc data type errors. + * Fixed error when reading files with wrong record length. + * Use max field precision when saving decimal numbers. + * Improved shapetype detection. + * Expanded docs on data types. + * General doc additions and travis icon. + +VERSION 1.2.10 + +2016-09-24 Karim Bahgat + * Bump version to fix pip install issue. + +VERSION 1.2.9 + +2016-09-22 Karim Bahgat + * Revert back to fix #66. + +VERSION 1.2.8 + +2016-08-17 Joel Lawhead + * Configured Travis-CI + +VERSION 1.2.5 + +2016-08-16 Joel Lawhead + * Reader speed up through batch unpacking bytes + * Merge README text into markdown file. Remove text version. + * Fixed parsing of number of points for some shapes (MULTIPOINTM, MULTIPOINTZ) + +VERSION 1.2.3 + +2015-06-21 Joel Lawhead + *shapefile.py (u) Bugfix for Python3 with Reader.iterShapeRecords() + +VERSION 1.2.2 + +### upcoming (2015/01/09 05:27 +00:00) +- [#11](https://github.com/geospatialpython/pyshp/pull/11) Merge pull request #11 from 7mp/master (@7mp) +- [#1](https://github.com/geospatialpython/pyshp/pull/1) Merge pull request #1 from rgbkrk/patch-1 (@rgbkrk) +- [#13](https://github.com/geospatialpython/pyshp/pull/13) Merge pull request #13 from jzmiller1/patch-1 (@jzmiller1) +- [#16](https://github.com/geospatialpython/pyshp/pull/16) Merge pull request #16 from riggsd/null-friendly (@riggsd) +- [#17](https://github.com/geospatialpython/pyshp/pull/17) Merge pull request #17 from riggsd/no-asserts (@riggsd) +- [#19](https://github.com/geospatialpython/pyshp/pull/19) Merge pull request #19 from razzius/master (@razzius) +- [#20](https://github.com/geospatialpython/pyshp/pull/20) Merge pull request #20 from Brideau/patch-1 (@Brideau) +- [12d69d4](https://github.com/GeospatialPython/pyshp/commit/12d69d47d8c90b445ea22bf5d9530b0c1c710de5) Updated to version 1.2.1 to match PyPI (@GeospatialPython) +- [05b69dc](https://github.com/GeospatialPython/pyshp/commit/05b69dc6b3d58c0dc9a822f6c4b8d45cf8dc9d94) Updated to version 1.2.1 to match PyPI (@GeospatialPython) +- [d2e9f1a](https://github.com/GeospatialPython/pyshp/commit/d2e9f1a41d02cf932484111f45c31781d1f7385a) Typo: recordsIter should be iterRecords (@Brideau) +- [a965aff](https://github.com/GeospatialPython/pyshp/commit/a965aff230aa3f3b85016f7b627609c7e53a2cf9) Format README code sample (@razzius) +- [66e1802](https://github.com/GeospatialPython/pyshp/commit/66e1802013fd3535baa505e15625afaa895ef819) Raise ShapefileException for runtime errors rather than using `assert()` +- [d72723c](https://github.com/GeospatialPython/pyshp/commit/d72723c9e38db8e859b79d95a65c00af1c2ba8ba) Return None when parsing (illegal) NULL attribute values in numeric and date fields, like those produced by QGIS +- [783e68c](https://github.com/GeospatialPython/pyshp/commit/783e68c75b8f20c7656ea470dbc5e9496a8ee0ac) Update link to "XBase File Format Description" (@jzmiller1) +- [79cc409](https://github.com/GeospatialPython/pyshp/commit/79cc409362a24caf4a21923419490ee95d557dc3) Added `Reader.iterShapeRecords` to help work with larger files +- [18c5521](https://github.com/GeospatialPython/pyshp/commit/18c5521b89cd1d7968dff8eb03c1ec37ab4307c5) URL Change (@rgbkrk) +- [202143c](https://github.com/GeospatialPython/pyshp/commit/202143c823407ffea07b5400e77b9ded7169f696) README.md TOC Take 2 +- [2cca75c](https://github.com/GeospatialPython/pyshp/commit/2cca75cd09b27bb19a77ffeb68afc535e3c33802) README.md TOC +- [8b5e994](https://github.com/GeospatialPython/pyshp/commit/8b5e994905fd4a70c0f7ce6d814346e6666b280c) README.md +- [f31a3d7](https://github.com/GeospatialPython/pyshp/commit/f31a3d773dd22e65d3e38ad8b034f186a05b4c4d) Update README.txt (@GeospatialPython) + +VERSION 1.2.1 + +2014-05-11 Joel Lawhead + *shapefile.py (u) fixed bug which failed to properly read some dbf fields in Python 3 + +VERSION 1.2.0 + +2013-09-05 Joel Lawhead + *README.txt add example/test for writing a 3D polygon + +VERSION 1.1.9 + +2013-07-27 Joel Lawhead + *shapefile.py (Writer.__shpRecords) fixed inconsistency between Reader and Writer + when referencing "z" and "m" values. This bug caused errors only when editing + 3D shapefiles. + +VERSION 1.1.8 + +2013-07-02 Joel Lawhead + *shapefile.py (Writer.poly()) fixed a bug that resulted in incorrect part indexes + *README.txt updated several errors in the documentation. + +2013-06-25 Joel Lawhead + *shapefile.py (Reader.shapes(),Reader.iterShapes()) Updated to verify the file length by + seeking to the end. A user reported shapefiles in the wild which had incorrect .shp file + lengths reported in the header which crashed when reading or iterating shapes. Most + insist on using the .shx file but there's no real reason to do so. + +VERSION 1.1.7 + +2013-06-22 Joel Lawhead + + *shapefile.py (_Shape.__geo_interface__) Added Python __geo_interface__ convention + to export shapefiles as GeoJSON. + + *shapefile.py (Reader.__init__) Used is_string() method to detect filenames passed + as unicode strings. + + *shapefile.py (Reader.iterShapes) Added iterShapes() method to iterate through + geometry records for parsing large files efficiently. + + *shapefile.py (Reader.iterRecords) Added iterRecords() method to iterate through + dbf records efficiently in large files. + + *shapefile.py (Reader.shape) Modified shape() method to use iterShapes() if shx + file is not available. + + *shapefile.py (main) Added __version__ attribute. + + *shapefile.py (Writer.record) Fixed bug which prevents writing the number 0 to + dbf fields. + + *shapefile.py (Reader.__shape) Updated to calculate and seek the start of the next record. The + shapefile spec does not require the content of a geometry record to be as long as the content + length defined in the header. The result is you can delete features without modifying the + record header allowing for empty space in records. + + *shapefile.py (Writer.poly) Added enforcement of closed polygons + + *shapefile.py (Writer.save) Added unique file name generator to use if no file names are passed + to a writer instance when saving (ex. w.save()). The unique file name is returned as a string. + + *README.txt (main) Added tests for iterShapes(), iterRecords(), __geo_interface__() + + *README.txt (main) Updated "bbox" property documentation to match Esri specification. + + + diff --git a/shapefile.py b/shapefile.py index beab2362..7d018aef 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1,2567 +1,2567 @@ -""" -shapefile.py -Provides read and write support for ESRI Shapefiles. -authors: jlawheadgeospatialpython.com -maintainer: karim.bahgat.norwaygmail.com -Compatible with Python versions 2.7-3.x -""" - -__version__ = "2.3.1" - -from struct import pack, unpack, calcsize, error, Struct -import os -import sys -import time -import array -import tempfile -import logging -import io -from datetime import date -import zipfile - -# Create named logger -logger = logging.getLogger(__name__) - - -# Module settings -VERBOSE = True - -# Constants for shape types -NULL = 0 -POINT = 1 -POLYLINE = 3 -POLYGON = 5 -MULTIPOINT = 8 -POINTZ = 11 -POLYLINEZ = 13 -POLYGONZ = 15 -MULTIPOINTZ = 18 -POINTM = 21 -POLYLINEM = 23 -POLYGONM = 25 -MULTIPOINTM = 28 -MULTIPATCH = 31 - -SHAPETYPE_LOOKUP = { - 0: 'NULL', - 1: 'POINT', - 3: 'POLYLINE', - 5: 'POLYGON', - 8: 'MULTIPOINT', - 11: 'POINTZ', - 13: 'POLYLINEZ', - 15: 'POLYGONZ', - 18: 'MULTIPOINTZ', - 21: 'POINTM', - 23: 'POLYLINEM', - 25: 'POLYGONM', - 28: 'MULTIPOINTM', - 31: 'MULTIPATCH'} - -TRIANGLE_STRIP = 0 -TRIANGLE_FAN = 1 -OUTER_RING = 2 -INNER_RING = 3 -FIRST_RING = 4 -RING = 5 - -PARTTYPE_LOOKUP = { - 0: 'TRIANGLE_STRIP', - 1: 'TRIANGLE_FAN', - 2: 'OUTER_RING', - 3: 'INNER_RING', - 4: 'FIRST_RING', - 5: 'RING'} - - -# Python 2-3 handling - -PYTHON3 = sys.version_info[0] == 3 - -if PYTHON3: - xrange = range - izip = zip - - from urllib.parse import urlparse, urlunparse - from urllib.error import HTTPError - from urllib.request import urlopen, Request - -else: - from itertools import izip - - from urlparse import urlparse, urlunparse - from urllib2 import HTTPError - from urllib2 import urlopen, Request - - -# Helpers - -MISSING = [None,''] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. - -if PYTHON3: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, str): - # For python 3 encode str to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return b"" - else: - # Force string representation. - return str(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 3 decode bytes to str. - return v.decode(encoding, encodingErrors) - elif isinstance(v, str): - # Already str. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, str) - -else: - def b(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, unicode): - # For python 2 encode unicode to bytes. - return v.encode(encoding, encodingErrors) - elif isinstance(v, bytes): - # Already bytes. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return "" - else: - # Force string representation. - return unicode(v).encode(encoding, encodingErrors) - - def u(v, encoding='utf-8', encodingErrors='strict'): - if isinstance(v, bytes): - # For python 2 decode bytes to unicode. - return v.decode(encoding, encodingErrors) - elif isinstance(v, unicode): - # Already unicode. - return v - elif v is None: - # Since we're dealing with text, interpret None as "" - return u"" - else: - # Force string representation. - return bytes(v).decode(encoding, encodingErrors) - - def is_string(v): - return isinstance(v, basestring) - -if sys.version_info[0:2] >= (3, 6): - def pathlike_obj(path): - if isinstance(path, os.PathLike): - return os.fsdecode(path) - else: - return path -else: - def pathlike_obj(path): - if is_string(path): - return path - elif hasattr(path, "__fspath__"): - return path.__fspath__() - else: - try: - return str(path) - except: - return path - - -# Begin - -class _Array(array.array): - """Converts python tuples to lists of the appropriate type. - Used to unpack different shapefile header parts.""" - def __repr__(self): - return str(self.tolist()) - -def signed_area(coords, fast=False): - """Return the signed area enclosed by a ring using the linear time - algorithm. A value >= 0 indicates a counter-clockwise oriented ring. - A faster version is possible by setting 'fast' to True, which returns - 2x the area, e.g. if you're only interested in the sign of the area. - """ - xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values - xs.append(xs[1]) - ys.append(ys[1]) - area2 = sum(xs[i]*(ys[i+1]-ys[i-1]) for i in range(1, len(coords))) - if fast: - return area2 - else: - return area2 / 2.0 - -def is_cw(coords): - """Returns True if a polygon ring has clockwise orientation, determined - by a negatively signed area. - """ - area2 = signed_area(coords, fast=True) - return area2 < 0 - -def rewind(coords): - """Returns the input coords in reversed order. - """ - return list(reversed(coords)) - -def ring_bbox(coords): - """Calculates and returns the bounding box of a ring. - """ - xs,ys = zip(*coords) - bbox = min(xs),min(ys),max(xs),max(ys) - return bbox - -def bbox_overlap(bbox1, bbox2): - """Tests whether two bounding boxes overlap, returning a boolean - """ - xmin1,ymin1,xmax1,ymax1 = bbox1 - xmin2,ymin2,xmax2,ymax2 = bbox2 - overlap = (xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2) - return overlap - -def bbox_contains(bbox1, bbox2): - """Tests whether bbox1 fully contains bbox2, returning a boolean - """ - xmin1,ymin1,xmax1,ymax1 = bbox1 - xmin2,ymin2,xmax2,ymax2 = bbox2 - contains = (xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2) - return contains - -def ring_contains_point(coords, p): - """Fast point-in-polygon crossings algorithm, MacMartin optimization. - - Adapted from code by Eric Haynes - http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c - - Original description: - Shoot a test ray along +X axis. The strategy, from MacMartin, is to - compare vertex Y values to the testing point's Y and quickly discard - edges which are entirely to one side of the test ray. - """ - tx,ty = p - - # get initial test bit for above/below X axis - vtx0 = coords[0] - yflag0 = ( vtx0[1] >= ty ) - - inside_flag = False - for vtx1 in coords[1:]: - yflag1 = ( vtx1[1] >= ty ) - # check if endpoints straddle (are on opposite sides) of X axis - # (i.e. the Y's differ); if so, +X ray could intersect this edge. - if yflag0 != yflag1: - xflag0 = ( vtx0[0] >= tx ) - # check if endpoints are on same side of the Y axis (i.e. X's - # are the same); if so, it's easy to test if edge hits or misses. - if xflag0 == ( vtx1[0] >= tx ): - # if edge's X values both right of the point, must hit - if xflag0: - inside_flag = not inside_flag - else: - # compute intersection of pgon segment with +X ray, note - # if >= point's X; if so, the ray hits it. - if ( vtx1[0] - (vtx1[1]-ty) * ( vtx0[0]-vtx1[0]) / (vtx0[1]-vtx1[1]) ) >= tx: - inside_flag = not inside_flag - - # move to next pair of vertices, retaining info as possible - yflag0 = yflag1 - vtx0 = vtx1 - - return inside_flag - -def ring_sample(coords, ccw=False): - """Return a sample point guaranteed to be within a ring, by efficiently - finding the first centroid of a coordinate triplet whose orientation - matches the orientation of the ring and passes the point-in-ring test. - The orientation of the ring is assumed to be clockwise, unless ccw - (counter-clockwise) is set to True. - """ - triplet = [] - def itercoords(): - # iterate full closed ring - for p in coords: - yield p - # finally, yield the second coordinate to the end to allow checking the last triplet - yield coords[1] - - for p in itercoords(): - # add point to triplet (but not if duplicate) - if p not in triplet: - triplet.append(p) - - # new triplet, try to get sample - if len(triplet) == 3: - # check that triplet does not form a straight line (not a triangle) - is_straight_line = (triplet[0][1] - triplet[1][1]) * (triplet[0][0] - triplet[2][0]) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) - if not is_straight_line: - # get triplet orientation - closed_triplet = triplet + [triplet[0]] - triplet_ccw = not is_cw(closed_triplet) - # check that triplet has the same orientation as the ring (means triangle is inside the ring) - if ccw == triplet_ccw: - # get triplet centroid - xs,ys = zip(*triplet) - xmean,ymean = sum(xs) / 3.0, sum(ys) / 3.0 - # check that triplet centroid is truly inside the ring - if ring_contains_point(coords, (xmean,ymean)): - return xmean,ymean - - # failed to get sample point from this triplet - # remove oldest triplet coord to allow iterating to next triplet - triplet.pop(0) - - else: - raise Exception('Unexpected error: Unable to find a ring sample point.') - -def ring_contains_ring(coords1, coords2): - '''Returns True if all vertexes in coords2 are fully inside coords1. - ''' - return all((ring_contains_point(coords1, p2) for p2 in coords2)) - -def organize_polygon_rings(rings, return_errors=None): - '''Organize a list of coordinate rings into one or more polygons with holes. - Returns a list of polygons, where each polygon is composed of a single exterior - ring, and one or more interior holes. If a return_errors dict is provided (optional), - any errors encountered will be added to it. - - Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). - Rings are determined as exteriors if they run in clockwise direction, or interior - holes if they run in counter-clockwise direction. This method is used to construct - GeoJSON (multi)polygons from the shapefile polygon shape type, which does not - explicitly store the structure of the polygons beyond exterior/interior ring orientation. - ''' - # first iterate rings and classify as exterior or hole - exteriors = [] - holes = [] - for ring in rings: - # shapefile format defines a polygon as a sequence of rings - # where exterior rings are clockwise, and holes counterclockwise - if is_cw(ring): - # ring is exterior - exteriors.append(ring) - else: - # ring is a hole - holes.append(ring) - - # if only one exterior, then all holes belong to that exterior - if len(exteriors) == 1: - # exit early - poly = [exteriors[0]] + holes - polys = [poly] - return polys - - # multiple exteriors, ie multi-polygon, have to group holes with correct exterior - # shapefile format does not specify which holes belong to which exteriors - # so have to do efficient multi-stage checking of hole-to-exterior containment - elif len(exteriors) > 1: - # exit early if no holes - if not holes: - polys = [] - for ext in exteriors: - poly = [ext] - polys.append(poly) - return polys - - # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = dict([(hole_i,[]) for hole_i in xrange(len(holes))]) - exterior_bboxes = [ring_bbox(ring) for ring in exteriors] - for hole_i in hole_exteriors.keys(): - hole_bbox = ring_bbox(holes[hole_i]) - for ext_i,ext_bbox in enumerate(exterior_bboxes): - if bbox_contains(ext_bbox, hole_bbox): - hole_exteriors[hole_i].append( ext_i ) - - # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test - for hole_i,exterior_candidates in hole_exteriors.items(): - - if len(exterior_candidates) > 1: - # get hole sample point - ccw = not is_cw(holes[hole_i]) - hole_sample = ring_sample(holes[hole_i], ccw=ccw) - # collect new exterior candidates - new_exterior_candidates = [] - for ext_i in exterior_candidates: - # check that hole sample point is inside exterior - hole_in_exterior = ring_contains_point(exteriors[ext_i], hole_sample) - if hole_in_exterior: - new_exterior_candidates.append(ext_i) - - # set new exterior candidates - hole_exteriors[hole_i] = new_exterior_candidates - - # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole - for hole_i,exterior_candidates in hole_exteriors.items(): - - if len(exterior_candidates) > 1: - # exterior candidate with the smallest area is the hole's most immediate parent - ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x], fast=True)))[0] - hole_exteriors[hole_i] = [ext_i] - - # separate out holes that are orphaned (not contained by any exterior) - orphan_holes = [] - for hole_i,exterior_candidates in list(hole_exteriors.items()): - if not exterior_candidates: - orphan_holes.append( hole_i ) - del hole_exteriors[hole_i] - continue - - # each hole should now only belong to one exterior, group into exterior-holes polygons - polys = [] - for ext_i,ext in enumerate(exteriors): - poly = [ext] - # find relevant holes - poly_holes = [] - for hole_i,exterior_candidates in list(hole_exteriors.items()): - # hole is relevant if previously matched with this exterior - if exterior_candidates[0] == ext_i: - poly_holes.append( holes[hole_i] ) - poly += poly_holes - polys.append(poly) - - # add orphan holes as exteriors - for hole_i in orphan_holes: - ext = holes[hole_i] - # add as single exterior without any holes - poly = [ext] - polys.append(poly) - - if orphan_holes and return_errors is not None: - return_errors['polygon_orphaned_holes'] = len(orphan_holes) - - return polys - - # no exteriors, be nice and assume due to incorrect winding order - else: - if return_errors is not None: - return_errors['polygon_only_holes'] = len(holes) - exteriors = holes - # add as single exterior without any holes - polys = [[ext] for ext in exteriors] - return polys - -class Shape(object): - def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None): - """Stores the geometry of the different shape types - specified in the Shapefile spec. Shape types are - usually point, polyline, or polygons. Every shape type - except the "Null" type contains points at some level for - example vertices in a polygon. If a shape type has - multiple shapes containing points within a single - geometry record then those shapes are called parts. Parts - are designated by their starting index in geometry record's - list of shapes. For MultiPatch geometry, partTypes designates - the patch type of each of the parts. - """ - self.shapeType = shapeType - self.points = points or [] - self.parts = parts or [] - if partTypes: - self.partTypes = partTypes - - # and a dict to silently record any errors encountered - self._errors = {} - - # add oid - if oid is not None: - self.__oid = oid - else: - self.__oid = -1 - - @property - def __geo_interface__(self): - if self.shapeType in [POINT, POINTM, POINTZ]: - # point - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'Point', 'coordinates':tuple()} - else: - return { - 'type': 'Point', - 'coordinates': tuple(self.points[0]) - } - elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: - if len(self.points) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'MultiPoint', 'coordinates':[]} - else: - # multipoint - return { - 'type': 'MultiPoint', - 'coordinates': [tuple(p) for p in self.points] - } - elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'LineString', 'coordinates':[]} - elif len(self.parts) == 1: - # linestring - return { - 'type': 'LineString', - 'coordinates': [tuple(p) for p in self.points] - } - else: - # multilinestring - ps = None - coordinates = [] - for part in self.parts: - if ps == None: - ps = part - continue - else: - coordinates.append([tuple(p) for p in self.points[ps:part]]) - ps = part - else: - coordinates.append([tuple(p) for p in self.points[part:]]) - return { - 'type': 'MultiLineString', - 'coordinates': coordinates - } - elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: - if len(self.parts) == 0: - # the shape has no coordinate information, i.e. is 'empty' - # the geojson spec does not define a proper null-geometry type - # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'Polygon', 'coordinates':[]} - else: - # get all polygon rings - rings = [] - for i in xrange(len(self.parts)): - # get indexes of start and end points of the ring - start = self.parts[i] - try: - end = self.parts[i+1] - except IndexError: - end = len(self.points) - - # extract the points that make up the ring - ring = [tuple(p) for p in self.points[start:end]] - rings.append(ring) - - # organize rings into list of polygons, where each polygon is defined as list of rings. - # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). - polys = organize_polygon_rings(rings, self._errors) - - # if VERBOSE is True, issue detailed warning about any shape errors - # encountered during the Shapefile to GeoJSON conversion - if VERBOSE and self._errors: - header = 'Possible issue encountered when converting Shape #{} to GeoJSON: '.format(self.oid) - orphans = self._errors.get('polygon_orphaned_holes', None) - if orphans: - msg = header + 'Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ -but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ -orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ -encoded as GeoJSON exterior rings instead of holes.' - logger.warning(msg) - only_holes = self._errors.get('polygon_only_holes', None) - if only_holes: - msg = header + 'Shapefile format requires that polygons contain at least one exterior ring, \ -but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ -still included but were encoded as GeoJSON exterior rings instead of holes.' - logger.warning(msg) - - # return as geojson - if len(polys) == 1: - return { - 'type': 'Polygon', - 'coordinates': polys[0] - } - else: - return { - 'type': 'MultiPolygon', - 'coordinates': polys - } - - else: - raise Exception('Shape type "%s" cannot be represented as GeoJSON.' % SHAPETYPE_LOOKUP[self.shapeType]) - - @staticmethod - def _from_geojson(geoj): - # create empty shape - shape = Shape() - # set shapeType - geojType = geoj["type"] if geoj else "Null" - if geojType == "Null": - shapeType = NULL - elif geojType == "Point": - shapeType = POINT - elif geojType == "LineString": - shapeType = POLYLINE - elif geojType == "Polygon": - shapeType = POLYGON - elif geojType == "MultiPoint": - shapeType = MULTIPOINT - elif geojType == "MultiLineString": - shapeType = POLYLINE - elif geojType == "MultiPolygon": - shapeType = POLYGON - else: - raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) - shape.shapeType = shapeType - - # set points and parts - if geojType == "Point": - shape.points = [ geoj["coordinates"] ] - shape.parts = [0] - elif geojType in ("MultiPoint","LineString"): - shape.points = geoj["coordinates"] - shape.parts = [0] - elif geojType in ("Polygon"): - points = [] - parts = [] - index = 0 - for i,ext_or_hole in enumerate(geoj["coordinates"]): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - elif geojType in ("MultiLineString"): - points = [] - parts = [] - index = 0 - for linestring in geoj["coordinates"]: - points.extend(linestring) - parts.append(index) - index += len(linestring) - shape.points = points - shape.parts = parts - elif geojType in ("MultiPolygon"): - points = [] - parts = [] - index = 0 - for polygon in geoj["coordinates"]: - for i,ext_or_hole in enumerate(polygon): - # although the latest GeoJSON spec states that exterior rings should have - # counter-clockwise orientation, we explicitly check orientation since older - # GeoJSONs might not enforce this. - if i == 0 and not is_cw(ext_or_hole): - # flip exterior direction - ext_or_hole = rewind(ext_or_hole) - elif i > 0 and is_cw(ext_or_hole): - # flip hole direction - ext_or_hole = rewind(ext_or_hole) - points.extend(ext_or_hole) - parts.append(index) - index += len(ext_or_hole) - shape.points = points - shape.parts = parts - return shape - - @property - def oid(self): - """The index position of the shape in the original shapefile""" - return self.__oid - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def __repr__(self): - return 'Shape #{}: {}'.format(self.__oid, self.shapeTypeName) - -class _Record(list): - """ - A class to hold a record. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, the values of the record - can also be retrieved using the field's name. For example if the dbf contains - a field ID at position 0, the ID can be retrieved with the position, the field name - as a key, or the field name as an attribute. - - >>> # Create a Record with one field, normally the record is created by the Reader class - >>> r = _Record({'ID': 0}, [0]) - >>> print(r[0]) - >>> print(r['ID']) - >>> print(r.ID) - """ - - def __init__(self, field_positions, values, oid=None): - """ - A Record should be created by the Reader class - - :param field_positions: A dict mapping field names to field positions - :param values: A sequence of values - :param oid: The object id, an int (optional) - """ - self.__field_positions = field_positions - if oid is not None: - self.__oid = oid - else: - self.__oid = -1 - list.__init__(self, values) - - def __getattr__(self, item): - """ - __getattr__ is called if an attribute is used that does - not exist in the normal sense. For example r=Record(...), r.ID - calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) - :param item: The field name, used as attribute - :return: Value of the field - :raises: AttributeError, if item is not a field of the shapefile - and IndexError, if the field exists but the field's - corresponding value in the Record does not exist - """ - try: - if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() - raise AttributeError('_Record does not implement __setstate__') - index = self.__field_positions[item] - return list.__getitem__(self, index) - except KeyError: - raise AttributeError('{} is not a field name'.format(item)) - except IndexError: - raise IndexError('{} found as a field but not enough values available.'.format(item)) - - def __setattr__(self, key, value): - """ - Sets a value of a field attribute - :param key: The field name - :param value: the value of that field - :return: None - :raises: AttributeError, if key is not a field of the shapefile - """ - if key.startswith('_'): # Prevent infinite loop when setting mangled attribute - return list.__setattr__(self, key, value) - try: - index = self.__field_positions[key] - return list.__setitem__(self, index, value) - except KeyError: - raise AttributeError('{} is not a field name'.format(key)) - - def __getitem__(self, item): - """ - Extends the normal list item access with - access using a fieldname - - For example r['ID'], r[0] - :param item: Either the position of the value or the name of a field - :return: the value of the field - """ - try: - return list.__getitem__(self, item) - except TypeError: - try: - index = self.__field_positions[item] - except KeyError: - index = None - if index is not None: - return list.__getitem__(self, index) - else: - raise IndexError('"{}" is not a field name and not an int'.format(item)) - - def __setitem__(self, key, value): - """ - Extends the normal list item access with - access using a fieldname - - For example r['ID']=2, r[0]=2 - :param key: Either the position of the value or the name of a field - :param value: the new value of the field - """ - try: - return list.__setitem__(self, key, value) - except TypeError: - index = self.__field_positions.get(key) - if index is not None: - return list.__setitem__(self, index, value) - else: - raise IndexError('{} is not a field name and not an int'.format(key)) - - @property - def oid(self): - """The index position of the record in the original shapefile""" - return self.__oid - - def as_dict(self, date_strings=False): - """ - Returns this Record as a dictionary using the field names as keys - :return: dict - """ - dct = dict((f, self[i]) for f, i in self.__field_positions.items()) - if date_strings: - for k,v in dct.items(): - if isinstance(v, date): - dct[k] = '{:04d}{:02d}{:02d}'.format(v.year, v.month, v.day) - return dct - - def __repr__(self): - return 'Record #{}: {}'.format(self.__oid, list(self)) - - def __dir__(self): - """ - Helps to show the field names in an interactive environment like IPython. - See: http://ipython.readthedocs.io/en/stable/config/integrating.html - - :return: List of method names and fields - """ - default = list(dir(type(self))) # default list methods and attributes of this class - fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) - return default + fnames - -class ShapeRecord(object): - """A ShapeRecord object containing a shape along with its attributes. - Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" - def __init__(self, shape=None, record=None): - self.shape = shape - self.record = record - - @property - def __geo_interface__(self): - return {'type': 'Feature', - 'properties': self.record.as_dict(date_strings=True), - 'geometry': None if self.shape.shapeType == NULL else self.shape.__geo_interface__} - -class Shapes(list): - """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, this also provides the GeoJSON __geo_interface__ - to return a GeometryCollection dictionary.""" - - def __repr__(self): - return 'Shapes: {}'.format(list(self)) - - @property - def __geo_interface__(self): - # Note: currently this will fail if any of the shapes are null-geometries - # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords - collection = {'type': 'GeometryCollection', - 'geometries': [shape.__geo_interface__ for shape in self]} - return collection - -class ShapeRecords(list): - """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with - former work and to reuse all the optimizations of the builtin list. - In addition to the list interface, this also provides the GeoJSON __geo_interface__ - to return a FeatureCollection dictionary.""" - - def __repr__(self): - return 'ShapeRecords: {}'.format(list(self)) - - @property - def __geo_interface__(self): - collection = {'type': 'FeatureCollection', - 'features': [shaperec.__geo_interface__ for shaperec in self]} - return collection - -class ShapefileException(Exception): - """An exception to handle shapefile specific problems.""" - pass - -class Reader(object): - """Reads the three files of a shapefile as a unit or - separately. If one of the three files (.shp, .shx, - .dbf) is missing no exception is thrown until you try - to call a method that depends on that particular file. - The .shx index file is used if available for efficiency - but is not required to read the geometry from the .shp - file. The "shapefile" argument in the constructor is the - name of the file you want to open, and can be the path - to a shapefile on a local filesystem, inside a zipfile, - or a url. - - You can instantiate a Reader without specifying a shapefile - and then specify one later with the load() method. - - Only the shapefile headers are read upon loading. Content - within each file is only accessed when required and as - efficiently as possible. Shapefiles are usually not large - but they can be. - """ - def __init__(self, *args, **kwargs): - self.shp = None - self.shx = None - self.dbf = None - self._files_to_close = [] - self.shapeName = "Not specified" - self._offsets = [] - self.shpLength = None - self.numRecords = None - self.numShapes = None - self.fields = [] - self.__dbfHdrLength = 0 - self.__fieldLookup = {} - self.encoding = kwargs.pop('encoding', 'utf-8') - self.encodingErrors = kwargs.pop('encodingErrors', 'strict') - # See if a shapefile name was passed as the first argument - if len(args) > 0: - path = pathlike_obj(args[0]) - if is_string(path): - - if '.zip' in path: - # Shapefile is inside a zipfile - if path.count('.zip') > 1: - # Multiple nested zipfiles - raise ShapefileException('Reading from multiple nested zipfiles is not supported: %s' % path) - # Split into zipfile and shapefile paths - if path.endswith('.zip'): - zpath = path - shapefile = None - else: - zpath = path[:path.find('.zip')+4] - shapefile = path[path.find('.zip')+4+1:] - # Create a zip file handle - if zpath.startswith('http'): - # Zipfile is from a url - # Download to a temporary url and treat as normal zipfile - req = Request(zpath, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) - resp = urlopen(req) - # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected - zipfileobj = tempfile.NamedTemporaryFile(mode='w+b', suffix='.zip', delete=True) - zipfileobj.write(resp.read()) - zipfileobj.seek(0) - else: - # Zipfile is from a file - zipfileobj = open(zpath, mode='rb') - # Open the zipfile archive - with zipfile.ZipFile(zipfileobj, 'r') as archive: - if not shapefile: - # Only the zipfile path is given - # Inspect zipfile contents to find the full shapefile path - shapefiles = [name - for name in archive.namelist() - if (name.endswith('.SHP') or name.endswith('.shp'))] - # The zipfile must contain exactly one shapefile - if len(shapefiles) == 0: - raise ShapefileException('Zipfile does not contain any shapefiles') - elif len(shapefiles) == 1: - shapefile = shapefiles[0] - else: - raise ShapefileException('Zipfile contains more than one shapefile: %s. Please specify the full \ - path to the shapefile you would like to open.' % shapefiles ) - # Try to extract file-like objects from zipfile - shapefile = os.path.splitext(shapefile)[0] # root shapefile name - for ext in ['SHP','SHX','DBF','shp','shx','dbf']: - try: - member = archive.open(shapefile+'.'+ext) - # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) - fileobj.write(member.read()) - fileobj.seek(0) - setattr(self, ext.lower(), fileobj) - self._files_to_close.append(fileobj) - except: - pass - # Close and delete the temporary zipfile - try: zipfileobj.close() - except: pass - # Try to load shapefile - if (self.shp or self.dbf): - # Load and exit early - self.load() - return - else: - raise ShapefileException("No shp or dbf file found in zipfile: %s" % path) - - elif path.startswith('http'): - # Shapefile is from a url - # Download each file to temporary path and treat as normal shapefile path - urlinfo = urlparse(path) - urlpath = urlinfo[2] - urlpath,_ = os.path.splitext(urlpath) - shapefile = os.path.basename(urlpath) - for ext in ['shp','shx','dbf']: - try: - _urlinfo = list(urlinfo) - _urlinfo[2] = urlpath + '.' + ext - _path = urlunparse(_urlinfo) - req = Request(_path, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) - resp = urlopen(req) - # write url data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) - fileobj.write(resp.read()) - fileobj.seek(0) - setattr(self, ext, fileobj) - self._files_to_close.append(fileobj) - except HTTPError: - pass - if (self.shp or self.dbf): - # Load and exit early - self.load() - return - else: - raise ShapefileException("No shp or dbf file found at url: %s" % path) - - else: - # Local file path to a shapefile - # Load and exit early - self.load(path) - return - - # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) - if "shp" in kwargs.keys(): - if hasattr(kwargs["shp"], "read"): - self.shp = kwargs["shp"] - # Copy if required - try: - self.shp.seek(0) - except (NameError, io.UnsupportedOperation): - self.shp = io.BytesIO(self.shp.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shp"]) - self.load_shp(baseName) - - if "shx" in kwargs.keys(): - if hasattr(kwargs["shx"], "read"): - self.shx = kwargs["shx"] - # Copy if required - try: - self.shx.seek(0) - except (NameError, io.UnsupportedOperation): - self.shx = io.BytesIO(self.shx.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["shx"]) - self.load_shx(baseName) - - if "dbf" in kwargs.keys(): - if hasattr(kwargs["dbf"], "read"): - self.dbf = kwargs["dbf"] - # Copy if required - try: - self.dbf.seek(0) - except (NameError, io.UnsupportedOperation): - self.dbf = io.BytesIO(self.dbf.read()) - else: - (baseName, ext) = os.path.splitext(kwargs["dbf"]) - self.load_dbf(baseName) - - # Load the files - if self.shp or self.dbf: - self.load() - - def __str__(self): - """ - Use some general info on the shapefile as __str__ - """ - info = ['shapefile Reader'] - if self.shp: - info.append(" {} shapes (type '{}')".format( - len(self), SHAPETYPE_LOOKUP[self.shapeType])) - if self.dbf: - info.append(' {} records ({} fields)'.format( - len(self), len(self.fields))) - return '\n'.join(info) - - def __enter__(self): - """ - Enter phase of context manager. - """ - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - """ - Exit phase of context manager, close opened files. - """ - self.close() - - def __len__(self): - """Returns the number of shapes/records in the shapefile.""" - if self.dbf: - # Preferably use dbf record count - if self.numRecords is None: - self.__dbfHeader() - - return self.numRecords - - elif self.shp: - # Otherwise use shape count - if self.shx: - if self.numShapes is None: - self.__shxHeader() - - return self.numShapes - - else: - # Index file not available, iterate all shapes to get total count - if self.numShapes is None: - # Determine length of shp file - shp = self.shp - checkpoint = shp.tell() - shp.seek(0,2) - shpLength = shp.tell() - shp.seek(100) - # Do a fast shape iteration until end of file. - unpack = Struct('>2i').unpack - offsets = [] - pos = shp.tell() - while pos < shpLength: - offsets.append(pos) - # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) - # Jump to next shape position - pos += 8 + (2 * recLength) - shp.seek(pos) - # Set numShapes and offset indices - self.numShapes = len(offsets) - self._offsets = offsets - # Return to previous file position - shp.seek(checkpoint) - - return self.numShapes - - else: - # No file loaded yet, treat as 'empty' shapefile - return 0 - - def __iter__(self): - """Iterates through the shapes/records in the shapefile.""" - for shaperec in self.iterShapeRecords(): - yield shaperec - - @property - def __geo_interface__(self): - shaperecords = self.shapeRecords() - fcollection = shaperecords.__geo_interface__ - fcollection['bbox'] = list(self.bbox) - return fcollection - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def load(self, shapefile=None): - """Opens a shapefile from a filename or file-like - object. Normally this method would be called by the - constructor with the file name as an argument.""" - if shapefile: - (shapeName, ext) = os.path.splitext(shapefile) - self.shapeName = shapeName - self.load_shp(shapeName) - self.load_shx(shapeName) - self.load_dbf(shapeName) - if not (self.shp or self.dbf): - raise ShapefileException("Unable to open %s.dbf or %s.shp." % (shapeName, shapeName)) - if self.shp: - self.__shpHeader() - if self.dbf: - self.__dbfHeader() - if self.shx: - self.__shxHeader() - - def load_shp(self, shapefile_name): - """ - Attempts to load file with .shp extension as both lower and upper case - """ - shp_ext = 'shp' - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") - self._files_to_close.append(self.shp) - except IOError: - try: - self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") - self._files_to_close.append(self.shp) - except IOError: - pass - - def load_shx(self, shapefile_name): - """ - Attempts to load file with .shx extension as both lower and upper case - """ - shx_ext = 'shx' - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") - self._files_to_close.append(self.shx) - except IOError: - try: - self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") - self._files_to_close.append(self.shx) - except IOError: - pass - - def load_dbf(self, shapefile_name): - """ - Attempts to load file with .dbf extension as both lower and upper case - """ - dbf_ext = 'dbf' - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") - self._files_to_close.append(self.dbf) - except IOError: - try: - self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") - self._files_to_close.append(self.dbf) - except IOError: - pass - - def __del__(self): - self.close() - - def close(self): - # Close any files that the reader opened (but not those given by user) - for attribute in self._files_to_close: - if hasattr(attribute, 'close'): - try: - attribute.close() - except IOError: - pass - self._files_to_close = [] - - def __getFileObj(self, f): - """Checks to see if the requested shapefile file object is - available. If not a ShapefileException is raised.""" - if not f: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") - if self.shp and self.shpLength is None: - self.load() - if self.dbf and len(self.fields) == 0: - self.load() - return f - - def __restrictIndex(self, i): - """Provides list-like handling of a record index with a clearer - error message if the index is out of bounds.""" - if self.numRecords: - rmax = self.numRecords - 1 - if abs(i) > rmax: - raise IndexError("Shape or Record index out of range.") - if i < 0: i = range(self.numRecords)[i] - return i - - def __shpHeader(self): - """Reads the header information from a .shp file.""" - if not self.shp: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shp file found") - shp = self.shp - # File length (16-bit word * 2 = bytes) - shp.seek(24) - self.shpLength = unpack(">i", shp.read(4))[0] * 2 - # Shape type - shp.seek(32) - self.shapeType= unpack(" NODATA: - self.mbox.append(m) - else: - self.mbox.append(None) - - def __shape(self, oid=None, bbox=None): - """Returns the header info and geometry for a single shape.""" - f = self.__getFileObj(self.shp) - record = Shape(oid=oid) - nParts = nPoints = zmin = zmax = mmin = mmax = None - (recNum, recLength) = unpack(">2i", f.read(8)) - # Determine the start of the next record - next = f.tell() + (2 * recLength) - shapeType = unpack("= 16: - (mmin, mmax) = unpack("<2d", f.read(16)) - # Measure values less than -10e38 are nodata values according to the spec - if next - f.tell() >= nPoints * 8: - record.m = [] - for m in _Array('d', unpack("<%sd" % nPoints, f.read(nPoints * 8))): - if m > NODATA: - record.m.append(m) - else: - record.m.append(None) - else: - record.m = [None for _ in range(nPoints)] - # Read a single point - if shapeType in (1,11,21): - record.points = [_Array('d', unpack("<2d", f.read(16)))] - if bbox is not None: - # create bounding box for Point by duplicating coordinates - point_bbox = list(record.points[0] + record.points[0]) - # skip shape if no overlap with bounding box - if not bbox_overlap(bbox, point_bbox): - f.seek(next) - return None - # Read a single Z value - if shapeType == 11: - record.z = list(unpack("= 8: - (m,) = unpack(" NODATA: - record.m = [m] - else: - record.m = [None] - # Seek to the end of this record as defined by the record header because - # the shapefile spec doesn't require the actual content to meet the header - # definition. Probably allowed for lazy feature deletion. - f.seek(next) - return record - - def __shxHeader(self): - """Reads the header information from a .shx file.""" - shx = self.shx - if not shx: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shx file found") - # File length (16-bit word * 2 = bytes) - header length - shx.seek(24) - shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 - self.numShapes = shxRecordLength // 8 - - def __shxOffsets(self): - '''Reads the shape offset positions from a .shx file''' - shx = self.shx - if not shx: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shx file found") - # Jump to the first record. - shx.seek(100) - # Each index record consists of two nrs, we only want the first one - shxRecords = _Array('i', shx.read(2 * self.numShapes * 4) ) - if sys.byteorder != 'big': - shxRecords.byteswap() - self._offsets = [2 * el for el in shxRecords[::2]] - - def __shapeIndex(self, i=None): - """Returns the offset in a .shp file for a shape based on information - in the .shx index file.""" - shx = self.shx - # Return None if no shx or no index requested - if not shx or i == None: - return None - # At this point, we know the shx file exists - if not self._offsets: - self.__shxOffsets() - return self._offsets[i] - - def shape(self, i=0, bbox=None): - """Returns a shape object for a shape in the geometry - record file. - If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), - returns None if the shape is not within that region. - """ - shp = self.__getFileObj(self.shp) - i = self.__restrictIndex(i) - offset = self.__shapeIndex(i) - if not offset: - # Shx index not available. - # Determine length of shp file - shp.seek(0,2) - shpLength = shp.tell() - shp.seek(100) - # Do a fast shape iteration until the requested index or end of file. - unpack = Struct('>2i').unpack - _i = 0 - offset = shp.tell() - while offset < shpLength: - if _i == i: - # Reached the requested index, exit loop with the offset value - break - # Unpack the shape header only - (recNum, recLength) = unpack(shp.read(8)) - # Jump to next shape position - offset += 8 + (2 * recLength) - shp.seek(offset) - _i += 1 - # If the index was not found, it likely means the .shp file is incomplete - if _i != i: - raise ShapefileException('Shape index {} is out of bounds; the .shp file only contains {} shapes'.format(i, _i)) - - # Seek to the offset and read the shape - shp.seek(offset) - return self.__shape(oid=i, bbox=bbox) - - def shapes(self, bbox=None): - """Returns all shapes in a shapefile. - To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - shapes = Shapes() - shapes.extend(self.iterShapes(bbox=bbox)) - return shapes - - def iterShapes(self, bbox=None): - """Returns a generator of shapes in a shapefile. Useful - for handling large shapefiles. - To only read shapes within a given spatial region, specify the 'bbox' - arg as a list or tuple of xmin,ymin,xmax,ymax. - """ - shp = self.__getFileObj(self.shp) - # Found shapefiles which report incorrect - # shp file length in the header. Can't trust - # that so we seek to the end of the file - # and figure it out. - shp.seek(0,2) - shpLength = shp.tell() - shp.seek(100) - - if self.numShapes: - # Iterate exactly the number of shapes from shx header - for i in xrange(self.numShapes): - # MAYBE: check if more left of file or exit early? - shape = self.__shape(oid=i, bbox=bbox) - if shape: - yield shape - else: - # No shx file, unknown nr of shapes - # Instead iterate until reach end of file - # Collect the offset indices during iteration - i = 0 - offsets = [] - pos = shp.tell() - while pos < shpLength: - offsets.append(pos) - shape = self.__shape(oid=i, bbox=bbox) - pos = shp.tell() - if shape: - yield shape - i += 1 - # Entire shp file consumed - # Update the number of shapes and list of offsets - assert i == len(offsets) - self.numShapes = i - self._offsets = offsets - - def __dbfHeader(self): - """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" - if not self.dbf: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no dbf file found)") - dbf = self.dbf - # read relevant header parts - dbf.seek(0) - self.numRecords, self.__dbfHdrLength, self.__recordLength = \ - unpack(" 0: - px, py = list(zip(*s.points))[:2] - x.extend(px) - y.extend(py) - else: - # this should not happen. - # any shape that is not null should have at least one point, and only those should be sent here. - # could also mean that earlier code failed to add points to a non-null shape. - raise Exception("Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." % s.shapeType) - bbox = [min(x), min(y), max(x), max(y)] - # update global - if self._bbox: - # compare with existing - self._bbox = [min(bbox[0],self._bbox[0]), min(bbox[1],self._bbox[1]), max(bbox[2],self._bbox[2]), max(bbox[3],self._bbox[3])] - else: - # first time bbox is being set - self._bbox = bbox - return bbox - - def __zbox(self, s): - z = [] - for p in s.points: - try: - z.append(p[2]) - except IndexError: - # point did not have z value - # setting it to 0 is probably ok, since it means all are on the same elevation - z.append(0) - zbox = [min(z), max(z)] - # update global - if self._zbox: - # compare with existing - self._zbox = [min(zbox[0],self._zbox[0]), max(zbox[1],self._zbox[1])] - else: - # first time zbox is being set - self._zbox = zbox - return zbox - - def __mbox(self, s): - mpos = 3 if s.shapeType in (11,13,15,18,31) else 2 - m = [] - for p in s.points: - try: - if p[mpos] is not None: - # mbox should only be calculated on valid m values - m.append(p[mpos]) - except IndexError: - # point did not have m value so is missing - # mbox should only be calculated on valid m values - pass - if not m: - # only if none of the shapes had m values, should mbox be set to missing m values - m.append(NODATA) - mbox = [min(m), max(m)] - # update global - if self._mbox: - # compare with existing - self._mbox = [min(mbox[0],self._mbox[0]), max(mbox[1],self._mbox[1])] - else: - # first time mbox is being set - self._mbox = mbox - return mbox - - @property - def shapeTypeName(self): - return SHAPETYPE_LOOKUP[self.shapeType] - - def bbox(self): - """Returns the current bounding box for the shapefile which is - the lower-left and upper-right corners. It does not contain the - elevation or measure extremes.""" - return self._bbox - - def zbox(self): - """Returns the current z extremes for the shapefile.""" - return self._zbox - - def mbox(self): - """Returns the current m extremes for the shapefile.""" - return self._mbox - - def __shapefileHeader(self, fileObj, headerType='shp'): - """Writes the specified header type to the specified file-like object. - Several of the shapefile formats are so similar that a single generic - method to read or write them is warranted.""" - f = self.__getFileObj(fileObj) - f.seek(0) - # File code, Unused bytes - f.write(pack(">6i", 9994,0,0,0,0,0)) - # File length (Bytes / 2 = 16-bit words) - if headerType == 'shp': - f.write(pack(">i", self.__shpFileLength())) - elif headerType == 'shx': - f.write(pack('>i', ((100 + (self.shpNum * 8)) // 2))) - # Version, Shape type - if self.shapeType is None: - self.shapeType = NULL - f.write(pack("<2i", 1000, self.shapeType)) - # The shapefile's bounding box (lower left, upper right) - if self.shapeType != 0: - try: - bbox = self.bbox() - if bbox is None: - # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. - # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. - # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. - # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0,0,0,0] - f.write(pack("<4d", *bbox)) - except error: - raise ShapefileException("Failed to write shapefile bounding box. Floats required.") - else: - f.write(pack("<4d", 0,0,0,0)) - # Elevation - if self.shapeType in (11,13,15,18): - # Z values are present in Z type - zbox = self.zbox() - if zbox is None: - # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = [0,0] - else: - # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = [0,0] - # Measure - if self.shapeType in (11,13,15,18,21,23,25,28,31): - # M values are present in M or Z type - mbox = self.mbox() - if mbox is None: - # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = [0,0] - else: - # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = [0,0] - # Try writing - try: - f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) - except error: - raise ShapefileException("Failed to write shapefile elevation and measure values. Floats required.") - - def __dbfHeader(self): - """Writes the dbf header and field descriptors.""" - f = self.__getFileObj(self.dbf) - f.seek(0) - version = 3 - year, month, day = time.localtime()[:3] - year -= 1900 - # Get all fields, ignoring DeletionFlag if specified - fields = [field for field in self.fields if field[0] != 'DeletionFlag'] - # Ensure has at least one field - if not fields: - raise ShapefileException("Shapefile dbf file must contain at least one field.") - numRecs = self.recNum - numFields = len(fields) - headerLength = numFields * 32 + 33 - if headerLength >= 65535: - raise ShapefileException( - "Shapefile dbf header length exceeds maximum length.") - recordLength = sum([int(field[2]) for field in fields]) + 1 - header = pack('2i", self.shpNum, 0)) - start = f.tell() - # Shape Type - if self.shapeType is None and s.shapeType != NULL: - self.shapeType = s.shapeType - if s.shapeType != NULL and s.shapeType != self.shapeType: - raise Exception("The shape's type (%s) must match the type of the shapefile (%s)." % (s.shapeType, self.shapeType)) - f.write(pack(" 2 else 0)) for p in s.points] - except error: - raise ShapefileException("Failed to write elevation values for record %s. Expected floats." % self.shpNum) - # Write m extremes and values - # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA - # Note: missing m values are autoset to NODATA. - if s.shapeType in (13,15,18,23,25,28,31): - try: - f.write(pack("<2d", *self.__mbox(s))) - except error: - raise ShapefileException("Failed to write measure extremes for record %s. Expected floats" % self.shpNum) - try: - if hasattr(s,"m"): - # if m values are stored in attribute - f.write(pack("<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m])) - else: - # if m values are stored as 3rd/4th dimension - # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in (13,15,18,31) else 2 - [f.write(pack(" mpos and p[mpos] is not None else NODATA)) for p in s.points] - except error: - raise ShapefileException("Failed to write measure values for record %s. Expected floats" % self.shpNum) - # Write a single point - if s.shapeType in (1,11,21): - try: - f.write(pack("<2d", s.points[0][0], s.points[0][1])) - except error: - raise ShapefileException("Failed to write point for record %s. Expected floats." % self.shpNum) - # Write a single Z value - # Note: missing z values are autoset to 0, but not sure if this is ideal. - if s.shapeType == 11: - # update the global z box - self.__zbox(s) - # then write value - if hasattr(s, "z"): - # if z values are stored in attribute - try: - if not s.z: - s.z = (0,) - f.write(pack("i", length)) - f.seek(finish) - return offset,length - - def __shxRecord(self, offset, length): - """Writes the shx records.""" - f = self.__getFileObj(self.shx) - try: - f.write(pack(">i", offset // 2)) - except error: - raise ShapefileException('The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones.') - f.write(pack(">i", length)) - - def record(self, *recordList, **recordDict): - """Creates a dbf attribute record. You can submit either a sequence of - field values or keyword arguments of field names and values. Before - adding records you must add fields for the record values using the - field() method. If the record values exceed the number of fields the - extra ones won't be added. In the case of using keyword arguments to specify - field/value pairs only fields matching the already registered fields - will be added.""" - # Balance if already not balanced - if self.autoBalance and self.recNum > self.shpNum: - self.balance() - - fieldCount = sum((1 for field in self.fields if field[0] != 'DeletionFlag')) - if recordList: - record = list(recordList) - while len(record) < fieldCount: - record.append("") - elif recordDict: - record = [] - for field in self.fields: - if field[0] == 'DeletionFlag': - continue # ignore deletionflag field in case it was specified - if field[0] in recordDict: - val = recordDict[field[0]] - if val is None: - record.append("") - else: - record.append(val) - else: - record.append("") # need empty value for missing dict entries - else: - # Blank fields for empty record - record = ["" for _ in range(fieldCount)] - self.__dbfRecord(record) - - def __dbfRecord(self, record): - """Writes the dbf records.""" - f = self.__getFileObj(self.dbf) - if self.recNum == 0: - # first records, so all fields should be set - # allowing us to write the dbf header - # cannot change the fields after this point - self.__dbfHeader() - # first byte of the record is deletion flag, always disabled - f.write(b' ') - # begin - self.recNum += 1 - fields = (field for field in self.fields if field[0] != 'DeletionFlag') # ignore deletionflag field in case it was specified - for (fieldName, fieldType, size, deci), value in zip(fields, record): - # write - fieldType = fieldType.upper() - size = int(size) - if fieldType in ("N","F"): - # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. - if value in MISSING: - value = b"*"*size # QGIS NULL - elif not deci: - # force to int - try: - # first try to force directly to int. - # forcing a large int to float and back to int - # will lose information and result in wrong nr. - value = int(value) - except ValueError: - # forcing directly to int failed, so was probably a float. - value = int(float(value)) - value = format(value, "d")[:size].rjust(size) # caps the size if exceeds the field size - else: - value = float(value) - value = format(value, ".%sf"%deci)[:size].rjust(size) # caps the size if exceeds the field size - elif fieldType == "D": - # date: 8 bytes - date stored as a string in the format YYYYMMDD. - if isinstance(value, date): - value = '{:04d}{:02d}{:02d}'.format(value.year, value.month, value.day) - elif isinstance(value, list) and len(value) == 3: - value = '{:04d}{:02d}{:02d}'.format(*value) - elif value in MISSING: - value = b'0' * 8 # QGIS NULL for date type - elif is_string(value) and len(value) == 8: - pass # value is already a date string - else: - raise ShapefileException("Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value.") - elif fieldType == 'L': - # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. - if value in MISSING: - value = b' ' # missing is set to space - elif value in [True,1]: - value = b'T' - elif value in [False,0]: - value = b'F' - else: - value = b' ' # unknown is set to space - else: - # anything else is forced to string, truncated to the length of the field - value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) - if not isinstance(value, bytes): - # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - value = b(value, 'ascii', self.encodingErrors) # should be default ascii encoding - if len(value) != size: - raise ShapefileException( - "Shapefile Writer unable to pack incorrect sized value" - " (size %d) into field '%s' (size %d)." % (len(value), fieldName, size)) - f.write(value) - - def balance(self): - """Adds corresponding empty attributes or null geometry records depending - on which type of record was created to make sure all three files - are in synch.""" - while self.recNum > self.shpNum: - self.null() - while self.recNum < self.shpNum: - self.record() - - - def null(self): - """Creates a null shape.""" - self.shape(Shape(NULL)) - - - def point(self, x, y): - """Creates a POINT shape.""" - shapeType = POINT - pointShape = Shape(shapeType) - pointShape.points.append([x, y]) - self.shape(pointShape) - - def pointm(self, x, y, m=None): - """Creates a POINTM shape. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTM - pointShape = Shape(shapeType) - pointShape.points.append([x, y, m]) - self.shape(pointShape) - - def pointz(self, x, y, z=0, m=None): - """Creates a POINTZ shape. - If the z (elevation) value is not set, it defaults to 0. - If the m (measure) value is not set, it defaults to NoData.""" - shapeType = POINTZ - pointShape = Shape(shapeType) - pointShape.points.append([x, y, z, m]) - self.shape(pointShape) - - - def multipoint(self, points): - """Creates a MULTIPOINT shape. - Points is a list of xy values.""" - shapeType = MULTIPOINT - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointm(self, points): - """Creates a MULTIPOINTM shape. - Points is a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTM - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - def multipointz(self, points): - """Creates a MULTIPOINTZ shape. - Points is a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPOINTZ - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method - self._shapeparts(parts=points, shapeType=shapeType) - - - def line(self, lines): - """Creates a POLYLINE shape. - Lines is a collection of lines, each made up of a list of xy values.""" - shapeType = POLYLINE - self._shapeparts(parts=lines, shapeType=shapeType) - - def linem(self, lines): - """Creates a POLYLINEM shape. - Lines is a collection of lines, each made up of a list of xym values. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEM - self._shapeparts(parts=lines, shapeType=shapeType) - - def linez(self, lines): - """Creates a POLYLINEZ shape. - Lines is a collection of lines, each made up of a list of xyzm values. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYLINEZ - self._shapeparts(parts=lines, shapeType=shapeType) - - - def poly(self, polys): - """Creates a POLYGON shape. - Polys is a collection of polygons, each made up of a list of xy values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction.""" - shapeType = POLYGON - self._shapeparts(parts=polys, shapeType=shapeType) - - def polym(self, polys): - """Creates a POLYGONM shape. - Polys is a collection of polygons, each made up of a list of xym values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONM - self._shapeparts(parts=polys, shapeType=shapeType) - - def polyz(self, polys): - """Creates a POLYGONZ shape. - Polys is a collection of polygons, each made up of a list of xyzm values. - Note that for ordinary polygons the coordinates must run in a clockwise direction. - If some of the polygons are holes, these must run in a counterclockwise direction. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = POLYGONZ - self._shapeparts(parts=polys, shapeType=shapeType) - - - def multipatch(self, parts, partTypes): - """Creates a MULTIPATCH shape. - Parts is a collection of 3D surface patches, each made up of a list of xyzm values. - PartTypes is a list of types that define each of the surface patches. - The types can be any of the following module constants: TRIANGLE_STRIP, - TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. - If the z (elevation) value is not included, it defaults to 0. - If the m (measure) value is not included, it defaults to None (NoData).""" - shapeType = MULTIPATCH - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - polyShape.partTypes = partTypes - # write the shape - self.shape(polyShape) - - - def _shapeparts(self, parts, shapeType): - """Internal method for adding a shape that has multiple collections of points (parts): - lines, polygons, and multipoint shapes. - """ - polyShape = Shape(shapeType) - polyShape.parts = [] - polyShape.points = [] - # Make sure polygon rings (parts) are closed - if shapeType in (5,15,25,31): - for part in parts: - if part[0] != part[-1]: - part.append(part[0]) - # Add points and part indexes - for part in parts: - # set part index position - polyShape.parts.append(len(polyShape.points)) - # add points - for point in part: - # Ensure point is list - if not isinstance(point, list): - point = list(point) - polyShape.points.append(point) - # write the shape - self.shape(polyShape) - - def field(self, name, fieldType="C", size="50", decimal=0): - """Adds a dbf field descriptor to the shapefile.""" - if fieldType == "D": - size = "8" - decimal = 0 - elif fieldType == "L": - size = "1" - decimal = 0 - if len(self.fields) >= 2046: - raise ShapefileException( - "Shapefile Writer reached maximum number of fields: 2046.") - self.fields.append((name, fieldType, size, decimal)) - - -# Begin Testing -def test(**kwargs): - import doctest - doctest.NORMALIZE_WHITESPACE = 1 - verbosity = kwargs.get('verbose', 0) - if verbosity == 0: - print('Running doctests...') - - # ignore py2-3 unicode differences - import re - class Py23DocChecker(doctest.OutputChecker): - def check_output(self, want, got, optionflags): - if sys.version_info[0] == 2: - got = re.sub("u'(.*?)'", "'\\1'", got) - got = re.sub('u"(.*?)"', '"\\1"', got) - res = doctest.OutputChecker.check_output(self, want, got, optionflags) - return res - def summarize(self): - doctest.OutputChecker.summarize(True) - - # run tests - runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) - with open("README.md","rb") as fobj: - test = doctest.DocTestParser().get_doctest(string=fobj.read().decode("utf8").replace('\r\n','\n'), globs={}, name="README", filename="README.md", lineno=0) - failure_count, test_count = runner.run(test) - - # print results - if verbosity: - runner.summarize(True) - else: - if failure_count == 0: - print('All test passed successfully') - elif failure_count > 0: - runner.summarize(verbosity) - - return failure_count - -if __name__ == "__main__": - """ - Doctests are contained in the file 'README.md', and are tested using the built-in - testing libraries. - """ - failure_count = test() - sys.exit(failure_count) +""" +shapefile.py +Provides read and write support for ESRI Shapefiles. +authors: jlawheadgeospatialpython.com +maintainer: karim.bahgat.norwaygmail.com +Compatible with Python versions 2.7-3.x +""" + +__version__ = "2.3.1" + +from struct import pack, unpack, calcsize, error, Struct +import os +import sys +import time +import array +import tempfile +import logging +import io +from datetime import date +import zipfile + +# Create named logger +logger = logging.getLogger(__name__) + + +# Module settings +VERBOSE = True + +# Constants for shape types +NULL = 0 +POINT = 1 +POLYLINE = 3 +POLYGON = 5 +MULTIPOINT = 8 +POINTZ = 11 +POLYLINEZ = 13 +POLYGONZ = 15 +MULTIPOINTZ = 18 +POINTM = 21 +POLYLINEM = 23 +POLYGONM = 25 +MULTIPOINTM = 28 +MULTIPATCH = 31 + +SHAPETYPE_LOOKUP = { + 0: 'NULL', + 1: 'POINT', + 3: 'POLYLINE', + 5: 'POLYGON', + 8: 'MULTIPOINT', + 11: 'POINTZ', + 13: 'POLYLINEZ', + 15: 'POLYGONZ', + 18: 'MULTIPOINTZ', + 21: 'POINTM', + 23: 'POLYLINEM', + 25: 'POLYGONM', + 28: 'MULTIPOINTM', + 31: 'MULTIPATCH'} + +TRIANGLE_STRIP = 0 +TRIANGLE_FAN = 1 +OUTER_RING = 2 +INNER_RING = 3 +FIRST_RING = 4 +RING = 5 + +PARTTYPE_LOOKUP = { + 0: 'TRIANGLE_STRIP', + 1: 'TRIANGLE_FAN', + 2: 'OUTER_RING', + 3: 'INNER_RING', + 4: 'FIRST_RING', + 5: 'RING'} + + +# Python 2-3 handling + +PYTHON3 = sys.version_info[0] == 3 + +if PYTHON3: + xrange = range + izip = zip + + from urllib.parse import urlparse, urlunparse + from urllib.error import HTTPError + from urllib.request import urlopen, Request + +else: + from itertools import izip + + from urlparse import urlparse, urlunparse + from urllib2 import HTTPError + from urllib2 import urlopen, Request + + +# Helpers + +MISSING = [None,''] +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. + +if PYTHON3: + def b(v, encoding='utf-8', encodingErrors='strict'): + if isinstance(v, str): + # For python 3 encode str to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return b"" + else: + # Force string representation. + return str(v).encode(encoding, encodingErrors) + + def u(v, encoding='utf-8', encodingErrors='strict'): + if isinstance(v, bytes): + # For python 3 decode bytes to str. + return v.decode(encoding, encodingErrors) + elif isinstance(v, str): + # Already str. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) + + def is_string(v): + return isinstance(v, str) + +else: + def b(v, encoding='utf-8', encodingErrors='strict'): + if isinstance(v, unicode): + # For python 2 encode unicode to bytes. + return v.encode(encoding, encodingErrors) + elif isinstance(v, bytes): + # Already bytes. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return "" + else: + # Force string representation. + return unicode(v).encode(encoding, encodingErrors) + + def u(v, encoding='utf-8', encodingErrors='strict'): + if isinstance(v, bytes): + # For python 2 decode bytes to unicode. + return v.decode(encoding, encodingErrors) + elif isinstance(v, unicode): + # Already unicode. + return v + elif v is None: + # Since we're dealing with text, interpret None as "" + return u"" + else: + # Force string representation. + return bytes(v).decode(encoding, encodingErrors) + + def is_string(v): + return isinstance(v, basestring) + +if sys.version_info[0:2] >= (3, 6): + def pathlike_obj(path): + if isinstance(path, os.PathLike): + return os.fsdecode(path) + else: + return path +else: + def pathlike_obj(path): + if is_string(path): + return path + elif hasattr(path, "__fspath__"): + return path.__fspath__() + else: + try: + return str(path) + except: + return path + + +# Begin + +class _Array(array.array): + """Converts python tuples to lists of the appropriate type. + Used to unpack different shapefile header parts.""" + def __repr__(self): + return str(self.tolist()) + +def signed_area(coords, fast=False): + """Return the signed area enclosed by a ring using the linear time + algorithm. A value >= 0 indicates a counter-clockwise oriented ring. + A faster version is possible by setting 'fast' to True, which returns + 2x the area, e.g. if you're only interested in the sign of the area. + """ + xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values + xs.append(xs[1]) + ys.append(ys[1]) + area2 = sum(xs[i]*(ys[i+1]-ys[i-1]) for i in range(1, len(coords))) + if fast: + return area2 + else: + return area2 / 2.0 + +def is_cw(coords): + """Returns True if a polygon ring has clockwise orientation, determined + by a negatively signed area. + """ + area2 = signed_area(coords, fast=True) + return area2 < 0 + +def rewind(coords): + """Returns the input coords in reversed order. + """ + return list(reversed(coords)) + +def ring_bbox(coords): + """Calculates and returns the bounding box of a ring. + """ + xs,ys = zip(*coords) + bbox = min(xs),min(ys),max(xs),max(ys) + return bbox + +def bbox_overlap(bbox1, bbox2): + """Tests whether two bounding boxes overlap, returning a boolean + """ + xmin1,ymin1,xmax1,ymax1 = bbox1 + xmin2,ymin2,xmax2,ymax2 = bbox2 + overlap = (xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2) + return overlap + +def bbox_contains(bbox1, bbox2): + """Tests whether bbox1 fully contains bbox2, returning a boolean + """ + xmin1,ymin1,xmax1,ymax1 = bbox1 + xmin2,ymin2,xmax2,ymax2 = bbox2 + contains = (xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2) + return contains + +def ring_contains_point(coords, p): + """Fast point-in-polygon crossings algorithm, MacMartin optimization. + + Adapted from code by Eric Haynes + http://www.realtimerendering.com/resources/GraphicsGems//gemsiv/ptpoly_haines/ptinpoly.c + + Original description: + Shoot a test ray along +X axis. The strategy, from MacMartin, is to + compare vertex Y values to the testing point's Y and quickly discard + edges which are entirely to one side of the test ray. + """ + tx,ty = p + + # get initial test bit for above/below X axis + vtx0 = coords[0] + yflag0 = ( vtx0[1] >= ty ) + + inside_flag = False + for vtx1 in coords[1:]: + yflag1 = ( vtx1[1] >= ty ) + # check if endpoints straddle (are on opposite sides) of X axis + # (i.e. the Y's differ); if so, +X ray could intersect this edge. + if yflag0 != yflag1: + xflag0 = ( vtx0[0] >= tx ) + # check if endpoints are on same side of the Y axis (i.e. X's + # are the same); if so, it's easy to test if edge hits or misses. + if xflag0 == ( vtx1[0] >= tx ): + # if edge's X values both right of the point, must hit + if xflag0: + inside_flag = not inside_flag + else: + # compute intersection of pgon segment with +X ray, note + # if >= point's X; if so, the ray hits it. + if ( vtx1[0] - (vtx1[1]-ty) * ( vtx0[0]-vtx1[0]) / (vtx0[1]-vtx1[1]) ) >= tx: + inside_flag = not inside_flag + + # move to next pair of vertices, retaining info as possible + yflag0 = yflag1 + vtx0 = vtx1 + + return inside_flag + +def ring_sample(coords, ccw=False): + """Return a sample point guaranteed to be within a ring, by efficiently + finding the first centroid of a coordinate triplet whose orientation + matches the orientation of the ring and passes the point-in-ring test. + The orientation of the ring is assumed to be clockwise, unless ccw + (counter-clockwise) is set to True. + """ + triplet = [] + def itercoords(): + # iterate full closed ring + for p in coords: + yield p + # finally, yield the second coordinate to the end to allow checking the last triplet + yield coords[1] + + for p in itercoords(): + # add point to triplet (but not if duplicate) + if p not in triplet: + triplet.append(p) + + # new triplet, try to get sample + if len(triplet) == 3: + # check that triplet does not form a straight line (not a triangle) + is_straight_line = (triplet[0][1] - triplet[1][1]) * (triplet[0][0] - triplet[2][0]) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) + if not is_straight_line: + # get triplet orientation + closed_triplet = triplet + [triplet[0]] + triplet_ccw = not is_cw(closed_triplet) + # check that triplet has the same orientation as the ring (means triangle is inside the ring) + if ccw == triplet_ccw: + # get triplet centroid + xs,ys = zip(*triplet) + xmean,ymean = sum(xs) / 3.0, sum(ys) / 3.0 + # check that triplet centroid is truly inside the ring + if ring_contains_point(coords, (xmean,ymean)): + return xmean,ymean + + # failed to get sample point from this triplet + # remove oldest triplet coord to allow iterating to next triplet + triplet.pop(0) + + else: + raise Exception('Unexpected error: Unable to find a ring sample point.') + +def ring_contains_ring(coords1, coords2): + '''Returns True if all vertexes in coords2 are fully inside coords1. + ''' + return all((ring_contains_point(coords1, p2) for p2 in coords2)) + +def organize_polygon_rings(rings, return_errors=None): + '''Organize a list of coordinate rings into one or more polygons with holes. + Returns a list of polygons, where each polygon is composed of a single exterior + ring, and one or more interior holes. If a return_errors dict is provided (optional), + any errors encountered will be added to it. + + Rings must be closed, and cannot intersect each other (non-self-intersecting polygon). + Rings are determined as exteriors if they run in clockwise direction, or interior + holes if they run in counter-clockwise direction. This method is used to construct + GeoJSON (multi)polygons from the shapefile polygon shape type, which does not + explicitly store the structure of the polygons beyond exterior/interior ring orientation. + ''' + # first iterate rings and classify as exterior or hole + exteriors = [] + holes = [] + for ring in rings: + # shapefile format defines a polygon as a sequence of rings + # where exterior rings are clockwise, and holes counterclockwise + if is_cw(ring): + # ring is exterior + exteriors.append(ring) + else: + # ring is a hole + holes.append(ring) + + # if only one exterior, then all holes belong to that exterior + if len(exteriors) == 1: + # exit early + poly = [exteriors[0]] + holes + polys = [poly] + return polys + + # multiple exteriors, ie multi-polygon, have to group holes with correct exterior + # shapefile format does not specify which holes belong to which exteriors + # so have to do efficient multi-stage checking of hole-to-exterior containment + elif len(exteriors) > 1: + # exit early if no holes + if not holes: + polys = [] + for ext in exteriors: + poly = [ext] + polys.append(poly) + return polys + + # first determine each hole's candidate exteriors based on simple bbox contains test + hole_exteriors = dict([(hole_i,[]) for hole_i in xrange(len(holes))]) + exterior_bboxes = [ring_bbox(ring) for ring in exteriors] + for hole_i in hole_exteriors.keys(): + hole_bbox = ring_bbox(holes[hole_i]) + for ext_i,ext_bbox in enumerate(exterior_bboxes): + if bbox_contains(ext_bbox, hole_bbox): + hole_exteriors[hole_i].append( ext_i ) + + # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test + for hole_i,exterior_candidates in hole_exteriors.items(): + + if len(exterior_candidates) > 1: + # get hole sample point + ccw = not is_cw(holes[hole_i]) + hole_sample = ring_sample(holes[hole_i], ccw=ccw) + # collect new exterior candidates + new_exterior_candidates = [] + for ext_i in exterior_candidates: + # check that hole sample point is inside exterior + hole_in_exterior = ring_contains_point(exteriors[ext_i], hole_sample) + if hole_in_exterior: + new_exterior_candidates.append(ext_i) + + # set new exterior candidates + hole_exteriors[hole_i] = new_exterior_candidates + + # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole + for hole_i,exterior_candidates in hole_exteriors.items(): + + if len(exterior_candidates) > 1: + # exterior candidate with the smallest area is the hole's most immediate parent + ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x], fast=True)))[0] + hole_exteriors[hole_i] = [ext_i] + + # separate out holes that are orphaned (not contained by any exterior) + orphan_holes = [] + for hole_i,exterior_candidates in list(hole_exteriors.items()): + if not exterior_candidates: + orphan_holes.append( hole_i ) + del hole_exteriors[hole_i] + continue + + # each hole should now only belong to one exterior, group into exterior-holes polygons + polys = [] + for ext_i,ext in enumerate(exteriors): + poly = [ext] + # find relevant holes + poly_holes = [] + for hole_i,exterior_candidates in list(hole_exteriors.items()): + # hole is relevant if previously matched with this exterior + if exterior_candidates[0] == ext_i: + poly_holes.append( holes[hole_i] ) + poly += poly_holes + polys.append(poly) + + # add orphan holes as exteriors + for hole_i in orphan_holes: + ext = holes[hole_i] + # add as single exterior without any holes + poly = [ext] + polys.append(poly) + + if orphan_holes and return_errors is not None: + return_errors['polygon_orphaned_holes'] = len(orphan_holes) + + return polys + + # no exteriors, be nice and assume due to incorrect winding order + else: + if return_errors is not None: + return_errors['polygon_only_holes'] = len(holes) + exteriors = holes + # add as single exterior without any holes + polys = [[ext] for ext in exteriors] + return polys + +class Shape(object): + def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None): + """Stores the geometry of the different shape types + specified in the Shapefile spec. Shape types are + usually point, polyline, or polygons. Every shape type + except the "Null" type contains points at some level for + example vertices in a polygon. If a shape type has + multiple shapes containing points within a single + geometry record then those shapes are called parts. Parts + are designated by their starting index in geometry record's + list of shapes. For MultiPatch geometry, partTypes designates + the patch type of each of the parts. + """ + self.shapeType = shapeType + self.points = points or [] + self.parts = parts or [] + if partTypes: + self.partTypes = partTypes + + # and a dict to silently record any errors encountered + self._errors = {} + + # add oid + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 + + @property + def __geo_interface__(self): + if self.shapeType in [POINT, POINTM, POINTZ]: + # point + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {'type':'Point', 'coordinates':tuple()} + else: + return { + 'type': 'Point', + 'coordinates': tuple(self.points[0]) + } + elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: + if len(self.points) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {'type':'MultiPoint', 'coordinates':[]} + else: + # multipoint + return { + 'type': 'MultiPoint', + 'coordinates': [tuple(p) for p in self.points] + } + elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {'type':'LineString', 'coordinates':[]} + elif len(self.parts) == 1: + # linestring + return { + 'type': 'LineString', + 'coordinates': [tuple(p) for p in self.points] + } + else: + # multilinestring + ps = None + coordinates = [] + for part in self.parts: + if ps == None: + ps = part + continue + else: + coordinates.append([tuple(p) for p in self.points[ps:part]]) + ps = part + else: + coordinates.append([tuple(p) for p in self.points[part:]]) + return { + 'type': 'MultiLineString', + 'coordinates': coordinates + } + elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: + if len(self.parts) == 0: + # the shape has no coordinate information, i.e. is 'empty' + # the geojson spec does not define a proper null-geometry type + # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries + return {'type':'Polygon', 'coordinates':[]} + else: + # get all polygon rings + rings = [] + for i in xrange(len(self.parts)): + # get indexes of start and end points of the ring + start = self.parts[i] + try: + end = self.parts[i+1] + except IndexError: + end = len(self.points) + + # extract the points that make up the ring + ring = [tuple(p) for p in self.points[start:end]] + rings.append(ring) + + # organize rings into list of polygons, where each polygon is defined as list of rings. + # the first ring is the exterior and any remaining rings are holes (same as GeoJSON). + polys = organize_polygon_rings(rings, self._errors) + + # if VERBOSE is True, issue detailed warning about any shape errors + # encountered during the Shapefile to GeoJSON conversion + if VERBOSE and self._errors: + header = 'Possible issue encountered when converting Shape #{} to GeoJSON: '.format(self.oid) + orphans = self._errors.get('polygon_orphaned_holes', None) + if orphans: + msg = header + 'Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ +but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ +orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ +encoded as GeoJSON exterior rings instead of holes.' + logger.warning(msg) + only_holes = self._errors.get('polygon_only_holes', None) + if only_holes: + msg = header + 'Shapefile format requires that polygons contain at least one exterior ring, \ +but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ +still included but were encoded as GeoJSON exterior rings instead of holes.' + logger.warning(msg) + + # return as geojson + if len(polys) == 1: + return { + 'type': 'Polygon', + 'coordinates': polys[0] + } + else: + return { + 'type': 'MultiPolygon', + 'coordinates': polys + } + + else: + raise Exception('Shape type "%s" cannot be represented as GeoJSON.' % SHAPETYPE_LOOKUP[self.shapeType]) + + @staticmethod + def _from_geojson(geoj): + # create empty shape + shape = Shape() + # set shapeType + geojType = geoj["type"] if geoj else "Null" + if geojType == "Null": + shapeType = NULL + elif geojType == "Point": + shapeType = POINT + elif geojType == "LineString": + shapeType = POLYLINE + elif geojType == "Polygon": + shapeType = POLYGON + elif geojType == "MultiPoint": + shapeType = MULTIPOINT + elif geojType == "MultiLineString": + shapeType = POLYLINE + elif geojType == "MultiPolygon": + shapeType = POLYGON + else: + raise Exception("Cannot create Shape from GeoJSON type '%s'" % geojType) + shape.shapeType = shapeType + + # set points and parts + if geojType == "Point": + shape.points = [ geoj["coordinates"] ] + shape.parts = [0] + elif geojType in ("MultiPoint","LineString"): + shape.points = geoj["coordinates"] + shape.parts = [0] + elif geojType in ("Polygon"): + points = [] + parts = [] + index = 0 + for i,ext_or_hole in enumerate(geoj["coordinates"]): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + shape.points = points + shape.parts = parts + elif geojType in ("MultiLineString"): + points = [] + parts = [] + index = 0 + for linestring in geoj["coordinates"]: + points.extend(linestring) + parts.append(index) + index += len(linestring) + shape.points = points + shape.parts = parts + elif geojType in ("MultiPolygon"): + points = [] + parts = [] + index = 0 + for polygon in geoj["coordinates"]: + for i,ext_or_hole in enumerate(polygon): + # although the latest GeoJSON spec states that exterior rings should have + # counter-clockwise orientation, we explicitly check orientation since older + # GeoJSONs might not enforce this. + if i == 0 and not is_cw(ext_or_hole): + # flip exterior direction + ext_or_hole = rewind(ext_or_hole) + elif i > 0 and is_cw(ext_or_hole): + # flip hole direction + ext_or_hole = rewind(ext_or_hole) + points.extend(ext_or_hole) + parts.append(index) + index += len(ext_or_hole) + shape.points = points + shape.parts = parts + return shape + + @property + def oid(self): + """The index position of the shape in the original shapefile""" + return self.__oid + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def __repr__(self): + return 'Shape #{}: {}'.format(self.__oid, self.shapeTypeName) + +class _Record(list): + """ + A class to hold a record. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, the values of the record + can also be retrieved using the field's name. For example if the dbf contains + a field ID at position 0, the ID can be retrieved with the position, the field name + as a key, or the field name as an attribute. + + >>> # Create a Record with one field, normally the record is created by the Reader class + >>> r = _Record({'ID': 0}, [0]) + >>> print(r[0]) + >>> print(r['ID']) + >>> print(r.ID) + """ + + def __init__(self, field_positions, values, oid=None): + """ + A Record should be created by the Reader class + + :param field_positions: A dict mapping field names to field positions + :param values: A sequence of values + :param oid: The object id, an int (optional) + """ + self.__field_positions = field_positions + if oid is not None: + self.__oid = oid + else: + self.__oid = -1 + list.__init__(self, values) + + def __getattr__(self, item): + """ + __getattr__ is called if an attribute is used that does + not exist in the normal sense. For example r=Record(...), r.ID + calls r.__getattr__('ID'), but r.index(5) calls list.index(r, 5) + :param item: The field name, used as attribute + :return: Value of the field + :raises: AttributeError, if item is not a field of the shapefile + and IndexError, if the field exists but the field's + corresponding value in the Record does not exist + """ + try: + if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() + raise AttributeError('_Record does not implement __setstate__') + index = self.__field_positions[item] + return list.__getitem__(self, index) + except KeyError: + raise AttributeError('{} is not a field name'.format(item)) + except IndexError: + raise IndexError('{} found as a field but not enough values available.'.format(item)) + + def __setattr__(self, key, value): + """ + Sets a value of a field attribute + :param key: The field name + :param value: the value of that field + :return: None + :raises: AttributeError, if key is not a field of the shapefile + """ + if key.startswith('_'): # Prevent infinite loop when setting mangled attribute + return list.__setattr__(self, key, value) + try: + index = self.__field_positions[key] + return list.__setitem__(self, index, value) + except KeyError: + raise AttributeError('{} is not a field name'.format(key)) + + def __getitem__(self, item): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID'], r[0] + :param item: Either the position of the value or the name of a field + :return: the value of the field + """ + try: + return list.__getitem__(self, item) + except TypeError: + try: + index = self.__field_positions[item] + except KeyError: + index = None + if index is not None: + return list.__getitem__(self, index) + else: + raise IndexError('"{}" is not a field name and not an int'.format(item)) + + def __setitem__(self, key, value): + """ + Extends the normal list item access with + access using a fieldname + + For example r['ID']=2, r[0]=2 + :param key: Either the position of the value or the name of a field + :param value: the new value of the field + """ + try: + return list.__setitem__(self, key, value) + except TypeError: + index = self.__field_positions.get(key) + if index is not None: + return list.__setitem__(self, index, value) + else: + raise IndexError('{} is not a field name and not an int'.format(key)) + + @property + def oid(self): + """The index position of the record in the original shapefile""" + return self.__oid + + def as_dict(self, date_strings=False): + """ + Returns this Record as a dictionary using the field names as keys + :return: dict + """ + dct = dict((f, self[i]) for f, i in self.__field_positions.items()) + if date_strings: + for k,v in dct.items(): + if isinstance(v, date): + dct[k] = '{:04d}{:02d}{:02d}'.format(v.year, v.month, v.day) + return dct + + def __repr__(self): + return 'Record #{}: {}'.format(self.__oid, list(self)) + + def __dir__(self): + """ + Helps to show the field names in an interactive environment like IPython. + See: http://ipython.readthedocs.io/en/stable/config/integrating.html + + :return: List of method names and fields + """ + default = list(dir(type(self))) # default list methods and attributes of this class + fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) + return default + fnames + +class ShapeRecord(object): + """A ShapeRecord object containing a shape along with its attributes. + Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" + def __init__(self, shape=None, record=None): + self.shape = shape + self.record = record + + @property + def __geo_interface__(self): + return {'type': 'Feature', + 'properties': self.record.as_dict(date_strings=True), + 'geometry': None if self.shape.shapeType == NULL else self.shape.__geo_interface__} + +class Shapes(list): + """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a GeometryCollection dictionary.""" + + def __repr__(self): + return 'Shapes: {}'.format(list(self)) + + @property + def __geo_interface__(self): + # Note: currently this will fail if any of the shapes are null-geometries + # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords + collection = {'type': 'GeometryCollection', + 'geometries': [shape.__geo_interface__ for shape in self]} + return collection + +class ShapeRecords(list): + """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with + former work and to reuse all the optimizations of the builtin list. + In addition to the list interface, this also provides the GeoJSON __geo_interface__ + to return a FeatureCollection dictionary.""" + + def __repr__(self): + return 'ShapeRecords: {}'.format(list(self)) + + @property + def __geo_interface__(self): + collection = {'type': 'FeatureCollection', + 'features': [shaperec.__geo_interface__ for shaperec in self]} + return collection + +class ShapefileException(Exception): + """An exception to handle shapefile specific problems.""" + pass + +class Reader(object): + """Reads the three files of a shapefile as a unit or + separately. If one of the three files (.shp, .shx, + .dbf) is missing no exception is thrown until you try + to call a method that depends on that particular file. + The .shx index file is used if available for efficiency + but is not required to read the geometry from the .shp + file. The "shapefile" argument in the constructor is the + name of the file you want to open, and can be the path + to a shapefile on a local filesystem, inside a zipfile, + or a url. + + You can instantiate a Reader without specifying a shapefile + and then specify one later with the load() method. + + Only the shapefile headers are read upon loading. Content + within each file is only accessed when required and as + efficiently as possible. Shapefiles are usually not large + but they can be. + """ + def __init__(self, *args, **kwargs): + self.shp = None + self.shx = None + self.dbf = None + self._files_to_close = [] + self.shapeName = "Not specified" + self._offsets = [] + self.shpLength = None + self.numRecords = None + self.numShapes = None + self.fields = [] + self.__dbfHdrLength = 0 + self.__fieldLookup = {} + self.encoding = kwargs.pop('encoding', 'utf-8') + self.encodingErrors = kwargs.pop('encodingErrors', 'strict') + # See if a shapefile name was passed as the first argument + if len(args) > 0: + path = pathlike_obj(args[0]) + if is_string(path): + + if '.zip' in path: + # Shapefile is inside a zipfile + if path.count('.zip') > 1: + # Multiple nested zipfiles + raise ShapefileException('Reading from multiple nested zipfiles is not supported: %s' % path) + # Split into zipfile and shapefile paths + if path.endswith('.zip'): + zpath = path + shapefile = None + else: + zpath = path[:path.find('.zip')+4] + shapefile = path[path.find('.zip')+4+1:] + # Create a zip file handle + if zpath.startswith('http'): + # Zipfile is from a url + # Download to a temporary url and treat as normal zipfile + req = Request(zpath, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) + resp = urlopen(req) + # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected + zipfileobj = tempfile.NamedTemporaryFile(mode='w+b', suffix='.zip', delete=True) + zipfileobj.write(resp.read()) + zipfileobj.seek(0) + else: + # Zipfile is from a file + zipfileobj = open(zpath, mode='rb') + # Open the zipfile archive + with zipfile.ZipFile(zipfileobj, 'r') as archive: + if not shapefile: + # Only the zipfile path is given + # Inspect zipfile contents to find the full shapefile path + shapefiles = [name + for name in archive.namelist() + if (name.endswith('.SHP') or name.endswith('.shp'))] + # The zipfile must contain exactly one shapefile + if len(shapefiles) == 0: + raise ShapefileException('Zipfile does not contain any shapefiles') + elif len(shapefiles) == 1: + shapefile = shapefiles[0] + else: + raise ShapefileException('Zipfile contains more than one shapefile: %s. Please specify the full \ + path to the shapefile you would like to open.' % shapefiles ) + # Try to extract file-like objects from zipfile + shapefile = os.path.splitext(shapefile)[0] # root shapefile name + for ext in ['SHP','SHX','DBF','shp','shx','dbf']: + try: + member = archive.open(shapefile+'.'+ext) + # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) + fileobj.write(member.read()) + fileobj.seek(0) + setattr(self, ext.lower(), fileobj) + self._files_to_close.append(fileobj) + except: + pass + # Close and delete the temporary zipfile + try: zipfileobj.close() + except: pass + # Try to load shapefile + if (self.shp or self.dbf): + # Load and exit early + self.load() + return + else: + raise ShapefileException("No shp or dbf file found in zipfile: %s" % path) + + elif path.startswith('http'): + # Shapefile is from a url + # Download each file to temporary path and treat as normal shapefile path + urlinfo = urlparse(path) + urlpath = urlinfo[2] + urlpath,_ = os.path.splitext(urlpath) + shapefile = os.path.basename(urlpath) + for ext in ['shp','shx','dbf']: + try: + _urlinfo = list(urlinfo) + _urlinfo[2] = urlpath + '.' + ext + _path = urlunparse(_urlinfo) + req = Request(_path, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) + resp = urlopen(req) + # write url data to a read+write tempfile and use as source, gets deleted on close() + fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) + fileobj.write(resp.read()) + fileobj.seek(0) + setattr(self, ext, fileobj) + self._files_to_close.append(fileobj) + except HTTPError: + pass + if (self.shp or self.dbf): + # Load and exit early + self.load() + return + else: + raise ShapefileException("No shp or dbf file found at url: %s" % path) + + else: + # Local file path to a shapefile + # Load and exit early + self.load(path) + return + + # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) + if "shp" in kwargs.keys(): + if hasattr(kwargs["shp"], "read"): + self.shp = kwargs["shp"] + # Copy if required + try: + self.shp.seek(0) + except (NameError, io.UnsupportedOperation): + self.shp = io.BytesIO(self.shp.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["shp"]) + self.load_shp(baseName) + + if "shx" in kwargs.keys(): + if hasattr(kwargs["shx"], "read"): + self.shx = kwargs["shx"] + # Copy if required + try: + self.shx.seek(0) + except (NameError, io.UnsupportedOperation): + self.shx = io.BytesIO(self.shx.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["shx"]) + self.load_shx(baseName) + + if "dbf" in kwargs.keys(): + if hasattr(kwargs["dbf"], "read"): + self.dbf = kwargs["dbf"] + # Copy if required + try: + self.dbf.seek(0) + except (NameError, io.UnsupportedOperation): + self.dbf = io.BytesIO(self.dbf.read()) + else: + (baseName, ext) = os.path.splitext(kwargs["dbf"]) + self.load_dbf(baseName) + + # Load the files + if self.shp or self.dbf: + self.load() + + def __str__(self): + """ + Use some general info on the shapefile as __str__ + """ + info = ['shapefile Reader'] + if self.shp: + info.append(" {} shapes (type '{}')".format( + len(self), SHAPETYPE_LOOKUP[self.shapeType])) + if self.dbf: + info.append(' {} records ({} fields)'.format( + len(self), len(self.fields))) + return '\n'.join(info) + + def __enter__(self): + """ + Enter phase of context manager. + """ + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit phase of context manager, close opened files. + """ + self.close() + + def __len__(self): + """Returns the number of shapes/records in the shapefile.""" + if self.dbf: + # Preferably use dbf record count + if self.numRecords is None: + self.__dbfHeader() + + return self.numRecords + + elif self.shp: + # Otherwise use shape count + if self.shx: + if self.numShapes is None: + self.__shxHeader() + + return self.numShapes + + else: + # Index file not available, iterate all shapes to get total count + if self.numShapes is None: + # Determine length of shp file + shp = self.shp + checkpoint = shp.tell() + shp.seek(0,2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until end of file. + unpack = Struct('>2i').unpack + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + # Unpack the shape header only + (recNum, recLength) = unpack(shp.read(8)) + # Jump to next shape position + pos += 8 + (2 * recLength) + shp.seek(pos) + # Set numShapes and offset indices + self.numShapes = len(offsets) + self._offsets = offsets + # Return to previous file position + shp.seek(checkpoint) + + return self.numShapes + + else: + # No file loaded yet, treat as 'empty' shapefile + return 0 + + def __iter__(self): + """Iterates through the shapes/records in the shapefile.""" + for shaperec in self.iterShapeRecords(): + yield shaperec + + @property + def __geo_interface__(self): + shaperecords = self.shapeRecords() + fcollection = shaperecords.__geo_interface__ + fcollection['bbox'] = list(self.bbox) + return fcollection + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def load(self, shapefile=None): + """Opens a shapefile from a filename or file-like + object. Normally this method would be called by the + constructor with the file name as an argument.""" + if shapefile: + (shapeName, ext) = os.path.splitext(shapefile) + self.shapeName = shapeName + self.load_shp(shapeName) + self.load_shx(shapeName) + self.load_dbf(shapeName) + if not (self.shp or self.dbf): + raise ShapefileException("Unable to open %s.dbf or %s.shp." % (shapeName, shapeName)) + if self.shp: + self.__shpHeader() + if self.dbf: + self.__dbfHeader() + if self.shx: + self.__shxHeader() + + def load_shp(self, shapefile_name): + """ + Attempts to load file with .shp extension as both lower and upper case + """ + shp_ext = 'shp' + try: + self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") + self._files_to_close.append(self.shp) + except IOError: + try: + self.shp = open("%s.%s" % (shapefile_name, shp_ext.upper()), "rb") + self._files_to_close.append(self.shp) + except IOError: + pass + + def load_shx(self, shapefile_name): + """ + Attempts to load file with .shx extension as both lower and upper case + """ + shx_ext = 'shx' + try: + self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") + self._files_to_close.append(self.shx) + except IOError: + try: + self.shx = open("%s.%s" % (shapefile_name, shx_ext.upper()), "rb") + self._files_to_close.append(self.shx) + except IOError: + pass + + def load_dbf(self, shapefile_name): + """ + Attempts to load file with .dbf extension as both lower and upper case + """ + dbf_ext = 'dbf' + try: + self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") + self._files_to_close.append(self.dbf) + except IOError: + try: + self.dbf = open("%s.%s" % (shapefile_name, dbf_ext.upper()), "rb") + self._files_to_close.append(self.dbf) + except IOError: + pass + + def __del__(self): + self.close() + + def close(self): + # Close any files that the reader opened (but not those given by user) + for attribute in self._files_to_close: + if hasattr(attribute, 'close'): + try: + attribute.close() + except IOError: + pass + self._files_to_close = [] + + def __getFileObj(self, f): + """Checks to see if the requested shapefile file object is + available. If not a ShapefileException is raised.""" + if not f: + raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") + if self.shp and self.shpLength is None: + self.load() + if self.dbf and len(self.fields) == 0: + self.load() + return f + + def __restrictIndex(self, i): + """Provides list-like handling of a record index with a clearer + error message if the index is out of bounds.""" + if self.numRecords: + rmax = self.numRecords - 1 + if abs(i) > rmax: + raise IndexError("Shape or Record index out of range.") + if i < 0: i = range(self.numRecords)[i] + return i + + def __shpHeader(self): + """Reads the header information from a .shp file.""" + if not self.shp: + raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shp file found") + shp = self.shp + # File length (16-bit word * 2 = bytes) + shp.seek(24) + self.shpLength = unpack(">i", shp.read(4))[0] * 2 + # Shape type + shp.seek(32) + self.shapeType= unpack(" NODATA: + self.mbox.append(m) + else: + self.mbox.append(None) + + def __shape(self, oid=None, bbox=None): + """Returns the header info and geometry for a single shape.""" + f = self.__getFileObj(self.shp) + record = Shape(oid=oid) + nParts = nPoints = zmin = zmax = mmin = mmax = None + (recNum, recLength) = unpack(">2i", f.read(8)) + # Determine the start of the next record + next = f.tell() + (2 * recLength) + shapeType = unpack("= 16: + (mmin, mmax) = unpack("<2d", f.read(16)) + # Measure values less than -10e38 are nodata values according to the spec + if next - f.tell() >= nPoints * 8: + record.m = [] + for m in _Array('d', unpack("<%sd" % nPoints, f.read(nPoints * 8))): + if m > NODATA: + record.m.append(m) + else: + record.m.append(None) + else: + record.m = [None for _ in range(nPoints)] + # Read a single point + if shapeType in (1,11,21): + record.points = [_Array('d', unpack("<2d", f.read(16)))] + if bbox is not None: + # create bounding box for Point by duplicating coordinates + point_bbox = list(record.points[0] + record.points[0]) + # skip shape if no overlap with bounding box + if not bbox_overlap(bbox, point_bbox): + f.seek(next) + return None + # Read a single Z value + if shapeType == 11: + record.z = list(unpack("= 8: + (m,) = unpack(" NODATA: + record.m = [m] + else: + record.m = [None] + # Seek to the end of this record as defined by the record header because + # the shapefile spec doesn't require the actual content to meet the header + # definition. Probably allowed for lazy feature deletion. + f.seek(next) + return record + + def __shxHeader(self): + """Reads the header information from a .shx file.""" + shx = self.shx + if not shx: + raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shx file found") + # File length (16-bit word * 2 = bytes) - header length + shx.seek(24) + shxRecordLength = (unpack(">i", shx.read(4))[0] * 2) - 100 + self.numShapes = shxRecordLength // 8 + + def __shxOffsets(self): + '''Reads the shape offset positions from a .shx file''' + shx = self.shx + if not shx: + raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shx file found") + # Jump to the first record. + shx.seek(100) + # Each index record consists of two nrs, we only want the first one + shxRecords = _Array('i', shx.read(2 * self.numShapes * 4) ) + if sys.byteorder != 'big': + shxRecords.byteswap() + self._offsets = [2 * el for el in shxRecords[::2]] + + def __shapeIndex(self, i=None): + """Returns the offset in a .shp file for a shape based on information + in the .shx index file.""" + shx = self.shx + # Return None if no shx or no index requested + if not shx or i == None: + return None + # At this point, we know the shx file exists + if not self._offsets: + self.__shxOffsets() + return self._offsets[i] + + def shape(self, i=0, bbox=None): + """Returns a shape object for a shape in the geometry + record file. + If the 'bbox' arg is given (list or tuple of xmin,ymin,xmax,ymax), + returns None if the shape is not within that region. + """ + shp = self.__getFileObj(self.shp) + i = self.__restrictIndex(i) + offset = self.__shapeIndex(i) + if not offset: + # Shx index not available. + # Determine length of shp file + shp.seek(0,2) + shpLength = shp.tell() + shp.seek(100) + # Do a fast shape iteration until the requested index or end of file. + unpack = Struct('>2i').unpack + _i = 0 + offset = shp.tell() + while offset < shpLength: + if _i == i: + # Reached the requested index, exit loop with the offset value + break + # Unpack the shape header only + (recNum, recLength) = unpack(shp.read(8)) + # Jump to next shape position + offset += 8 + (2 * recLength) + shp.seek(offset) + _i += 1 + # If the index was not found, it likely means the .shp file is incomplete + if _i != i: + raise ShapefileException('Shape index {} is out of bounds; the .shp file only contains {} shapes'.format(i, _i)) + + # Seek to the offset and read the shape + shp.seek(offset) + return self.__shape(oid=i, bbox=bbox) + + def shapes(self, bbox=None): + """Returns all shapes in a shapefile. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shapes = Shapes() + shapes.extend(self.iterShapes(bbox=bbox)) + return shapes + + def iterShapes(self, bbox=None): + """Returns a generator of shapes in a shapefile. Useful + for handling large shapefiles. + To only read shapes within a given spatial region, specify the 'bbox' + arg as a list or tuple of xmin,ymin,xmax,ymax. + """ + shp = self.__getFileObj(self.shp) + # Found shapefiles which report incorrect + # shp file length in the header. Can't trust + # that so we seek to the end of the file + # and figure it out. + shp.seek(0,2) + shpLength = shp.tell() + shp.seek(100) + + if self.numShapes: + # Iterate exactly the number of shapes from shx header + for i in xrange(self.numShapes): + # MAYBE: check if more left of file or exit early? + shape = self.__shape(oid=i, bbox=bbox) + if shape: + yield shape + else: + # No shx file, unknown nr of shapes + # Instead iterate until reach end of file + # Collect the offset indices during iteration + i = 0 + offsets = [] + pos = shp.tell() + while pos < shpLength: + offsets.append(pos) + shape = self.__shape(oid=i, bbox=bbox) + pos = shp.tell() + if shape: + yield shape + i += 1 + # Entire shp file consumed + # Update the number of shapes and list of offsets + assert i == len(offsets) + self.numShapes = i + self._offsets = offsets + + def __dbfHeader(self): + """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" + if not self.dbf: + raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no dbf file found)") + dbf = self.dbf + # read relevant header parts + dbf.seek(0) + self.numRecords, self.__dbfHdrLength, self.__recordLength = \ + unpack(" 0: + px, py = list(zip(*s.points))[:2] + x.extend(px) + y.extend(py) + else: + # this should not happen. + # any shape that is not null should have at least one point, and only those should be sent here. + # could also mean that earlier code failed to add points to a non-null shape. + raise Exception("Cannot create bbox. Expected a valid shape with at least one point. Got a shape of type '%s' and 0 points." % s.shapeType) + bbox = [min(x), min(y), max(x), max(y)] + # update global + if self._bbox: + # compare with existing + self._bbox = [min(bbox[0],self._bbox[0]), min(bbox[1],self._bbox[1]), max(bbox[2],self._bbox[2]), max(bbox[3],self._bbox[3])] + else: + # first time bbox is being set + self._bbox = bbox + return bbox + + def __zbox(self, s): + z = [] + for p in s.points: + try: + z.append(p[2]) + except IndexError: + # point did not have z value + # setting it to 0 is probably ok, since it means all are on the same elevation + z.append(0) + zbox = [min(z), max(z)] + # update global + if self._zbox: + # compare with existing + self._zbox = [min(zbox[0],self._zbox[0]), max(zbox[1],self._zbox[1])] + else: + # first time zbox is being set + self._zbox = zbox + return zbox + + def __mbox(self, s): + mpos = 3 if s.shapeType in (11,13,15,18,31) else 2 + m = [] + for p in s.points: + try: + if p[mpos] is not None: + # mbox should only be calculated on valid m values + m.append(p[mpos]) + except IndexError: + # point did not have m value so is missing + # mbox should only be calculated on valid m values + pass + if not m: + # only if none of the shapes had m values, should mbox be set to missing m values + m.append(NODATA) + mbox = [min(m), max(m)] + # update global + if self._mbox: + # compare with existing + self._mbox = [min(mbox[0],self._mbox[0]), max(mbox[1],self._mbox[1])] + else: + # first time mbox is being set + self._mbox = mbox + return mbox + + @property + def shapeTypeName(self): + return SHAPETYPE_LOOKUP[self.shapeType] + + def bbox(self): + """Returns the current bounding box for the shapefile which is + the lower-left and upper-right corners. It does not contain the + elevation or measure extremes.""" + return self._bbox + + def zbox(self): + """Returns the current z extremes for the shapefile.""" + return self._zbox + + def mbox(self): + """Returns the current m extremes for the shapefile.""" + return self._mbox + + def __shapefileHeader(self, fileObj, headerType='shp'): + """Writes the specified header type to the specified file-like object. + Several of the shapefile formats are so similar that a single generic + method to read or write them is warranted.""" + f = self.__getFileObj(fileObj) + f.seek(0) + # File code, Unused bytes + f.write(pack(">6i", 9994,0,0,0,0,0)) + # File length (Bytes / 2 = 16-bit words) + if headerType == 'shp': + f.write(pack(">i", self.__shpFileLength())) + elif headerType == 'shx': + f.write(pack('>i', ((100 + (self.shpNum * 8)) // 2))) + # Version, Shape type + if self.shapeType is None: + self.shapeType = NULL + f.write(pack("<2i", 1000, self.shapeType)) + # The shapefile's bounding box (lower left, upper right) + if self.shapeType != 0: + try: + bbox = self.bbox() + if bbox is None: + # The bbox is initialized with None, so this would mean the shapefile contains no valid geometries. + # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. + # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. + # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. + bbox = [0,0,0,0] + f.write(pack("<4d", *bbox)) + except error: + raise ShapefileException("Failed to write shapefile bounding box. Floats required.") + else: + f.write(pack("<4d", 0,0,0,0)) + # Elevation + if self.shapeType in (11,13,15,18): + # Z values are present in Z type + zbox = self.zbox() + if zbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + zbox = [0,0] + else: + # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s + zbox = [0,0] + # Measure + if self.shapeType in (11,13,15,18,21,23,25,28,31): + # M values are present in M or Z type + mbox = self.mbox() + if mbox is None: + # means we have empty shapefile/only null geoms (see commentary on bbox above) + mbox = [0,0] + else: + # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s + mbox = [0,0] + # Try writing + try: + f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) + except error: + raise ShapefileException("Failed to write shapefile elevation and measure values. Floats required.") + + def __dbfHeader(self): + """Writes the dbf header and field descriptors.""" + f = self.__getFileObj(self.dbf) + f.seek(0) + version = 3 + year, month, day = time.localtime()[:3] + year -= 1900 + # Get all fields, ignoring DeletionFlag if specified + fields = [field for field in self.fields if field[0] != 'DeletionFlag'] + # Ensure has at least one field + if not fields: + raise ShapefileException("Shapefile dbf file must contain at least one field.") + numRecs = self.recNum + numFields = len(fields) + headerLength = numFields * 32 + 33 + if headerLength >= 65535: + raise ShapefileException( + "Shapefile dbf header length exceeds maximum length.") + recordLength = sum([int(field[2]) for field in fields]) + 1 + header = pack('2i", self.shpNum, 0)) + start = f.tell() + # Shape Type + if self.shapeType is None and s.shapeType != NULL: + self.shapeType = s.shapeType + if s.shapeType != NULL and s.shapeType != self.shapeType: + raise Exception("The shape's type (%s) must match the type of the shapefile (%s)." % (s.shapeType, self.shapeType)) + f.write(pack(" 2 else 0)) for p in s.points] + except error: + raise ShapefileException("Failed to write elevation values for record %s. Expected floats." % self.shpNum) + # Write m extremes and values + # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA + # Note: missing m values are autoset to NODATA. + if s.shapeType in (13,15,18,23,25,28,31): + try: + f.write(pack("<2d", *self.__mbox(s))) + except error: + raise ShapefileException("Failed to write measure extremes for record %s. Expected floats" % self.shpNum) + try: + if hasattr(s,"m"): + # if m values are stored in attribute + f.write(pack("<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m])) + else: + # if m values are stored as 3rd/4th dimension + # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) + mpos = 3 if s.shapeType in (13,15,18,31) else 2 + [f.write(pack(" mpos and p[mpos] is not None else NODATA)) for p in s.points] + except error: + raise ShapefileException("Failed to write measure values for record %s. Expected floats" % self.shpNum) + # Write a single point + if s.shapeType in (1,11,21): + try: + f.write(pack("<2d", s.points[0][0], s.points[0][1])) + except error: + raise ShapefileException("Failed to write point for record %s. Expected floats." % self.shpNum) + # Write a single Z value + # Note: missing z values are autoset to 0, but not sure if this is ideal. + if s.shapeType == 11: + # update the global z box + self.__zbox(s) + # then write value + if hasattr(s, "z"): + # if z values are stored in attribute + try: + if not s.z: + s.z = (0,) + f.write(pack("i", length)) + f.seek(finish) + return offset,length + + def __shxRecord(self, offset, length): + """Writes the shx records.""" + f = self.__getFileObj(self.shx) + try: + f.write(pack(">i", offset // 2)) + except error: + raise ShapefileException('The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones.') + f.write(pack(">i", length)) + + def record(self, *recordList, **recordDict): + """Creates a dbf attribute record. You can submit either a sequence of + field values or keyword arguments of field names and values. Before + adding records you must add fields for the record values using the + field() method. If the record values exceed the number of fields the + extra ones won't be added. In the case of using keyword arguments to specify + field/value pairs only fields matching the already registered fields + will be added.""" + # Balance if already not balanced + if self.autoBalance and self.recNum > self.shpNum: + self.balance() + + fieldCount = sum((1 for field in self.fields if field[0] != 'DeletionFlag')) + if recordList: + record = list(recordList) + while len(record) < fieldCount: + record.append("") + elif recordDict: + record = [] + for field in self.fields: + if field[0] == 'DeletionFlag': + continue # ignore deletionflag field in case it was specified + if field[0] in recordDict: + val = recordDict[field[0]] + if val is None: + record.append("") + else: + record.append(val) + else: + record.append("") # need empty value for missing dict entries + else: + # Blank fields for empty record + record = ["" for _ in range(fieldCount)] + self.__dbfRecord(record) + + def __dbfRecord(self, record): + """Writes the dbf records.""" + f = self.__getFileObj(self.dbf) + if self.recNum == 0: + # first records, so all fields should be set + # allowing us to write the dbf header + # cannot change the fields after this point + self.__dbfHeader() + # first byte of the record is deletion flag, always disabled + f.write(b' ') + # begin + self.recNum += 1 + fields = (field for field in self.fields if field[0] != 'DeletionFlag') # ignore deletionflag field in case it was specified + for (fieldName, fieldType, size, deci), value in zip(fields, record): + # write + fieldType = fieldType.upper() + size = int(size) + if fieldType in ("N","F"): + # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. + if value in MISSING: + value = b"*"*size # QGIS NULL + elif not deci: + # force to int + try: + # first try to force directly to int. + # forcing a large int to float and back to int + # will lose information and result in wrong nr. + value = int(value) + except ValueError: + # forcing directly to int failed, so was probably a float. + value = int(float(value)) + value = format(value, "d")[:size].rjust(size) # caps the size if exceeds the field size + else: + value = float(value) + value = format(value, ".%sf"%deci)[:size].rjust(size) # caps the size if exceeds the field size + elif fieldType == "D": + # date: 8 bytes - date stored as a string in the format YYYYMMDD. + if isinstance(value, date): + value = '{:04d}{:02d}{:02d}'.format(value.year, value.month, value.day) + elif isinstance(value, list) and len(value) == 3: + value = '{:04d}{:02d}{:02d}'.format(*value) + elif value in MISSING: + value = b'0' * 8 # QGIS NULL for date type + elif is_string(value) and len(value) == 8: + pass # value is already a date string + else: + raise ShapefileException("Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value.") + elif fieldType == 'L': + # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. + if value in MISSING: + value = b' ' # missing is set to space + elif value in [True,1]: + value = b'T' + elif value in [False,0]: + value = b'F' + else: + value = b' ' # unknown is set to space + else: + # anything else is forced to string, truncated to the length of the field + value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) + if not isinstance(value, bytes): + # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) + value = b(value, 'ascii', self.encodingErrors) # should be default ascii encoding + if len(value) != size: + raise ShapefileException( + "Shapefile Writer unable to pack incorrect sized value" + " (size %d) into field '%s' (size %d)." % (len(value), fieldName, size)) + f.write(value) + + def balance(self): + """Adds corresponding empty attributes or null geometry records depending + on which type of record was created to make sure all three files + are in synch.""" + while self.recNum > self.shpNum: + self.null() + while self.recNum < self.shpNum: + self.record() + + + def null(self): + """Creates a null shape.""" + self.shape(Shape(NULL)) + + + def point(self, x, y): + """Creates a POINT shape.""" + shapeType = POINT + pointShape = Shape(shapeType) + pointShape.points.append([x, y]) + self.shape(pointShape) + + def pointm(self, x, y, m=None): + """Creates a POINTM shape. + If the m (measure) value is not set, it defaults to NoData.""" + shapeType = POINTM + pointShape = Shape(shapeType) + pointShape.points.append([x, y, m]) + self.shape(pointShape) + + def pointz(self, x, y, z=0, m=None): + """Creates a POINTZ shape. + If the z (elevation) value is not set, it defaults to 0. + If the m (measure) value is not set, it defaults to NoData.""" + shapeType = POINTZ + pointShape = Shape(shapeType) + pointShape.points.append([x, y, z, m]) + self.shape(pointShape) + + + def multipoint(self, points): + """Creates a MULTIPOINT shape. + Points is a list of xy values.""" + shapeType = MULTIPOINT + points = [points] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def multipointm(self, points): + """Creates a MULTIPOINTM shape. + Points is a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPOINTM + points = [points] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + def multipointz(self, points): + """Creates a MULTIPOINTZ shape. + Points is a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPOINTZ + points = [points] # nest the points inside a list to be compatible with the generic shapeparts method + self._shapeparts(parts=points, shapeType=shapeType) + + + def line(self, lines): + """Creates a POLYLINE shape. + Lines is a collection of lines, each made up of a list of xy values.""" + shapeType = POLYLINE + self._shapeparts(parts=lines, shapeType=shapeType) + + def linem(self, lines): + """Creates a POLYLINEM shape. + Lines is a collection of lines, each made up of a list of xym values. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYLINEM + self._shapeparts(parts=lines, shapeType=shapeType) + + def linez(self, lines): + """Creates a POLYLINEZ shape. + Lines is a collection of lines, each made up of a list of xyzm values. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYLINEZ + self._shapeparts(parts=lines, shapeType=shapeType) + + + def poly(self, polys): + """Creates a POLYGON shape. + Polys is a collection of polygons, each made up of a list of xy values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction.""" + shapeType = POLYGON + self._shapeparts(parts=polys, shapeType=shapeType) + + def polym(self, polys): + """Creates a POLYGONM shape. + Polys is a collection of polygons, each made up of a list of xym values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYGONM + self._shapeparts(parts=polys, shapeType=shapeType) + + def polyz(self, polys): + """Creates a POLYGONZ shape. + Polys is a collection of polygons, each made up of a list of xyzm values. + Note that for ordinary polygons the coordinates must run in a clockwise direction. + If some of the polygons are holes, these must run in a counterclockwise direction. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = POLYGONZ + self._shapeparts(parts=polys, shapeType=shapeType) + + + def multipatch(self, parts, partTypes): + """Creates a MULTIPATCH shape. + Parts is a collection of 3D surface patches, each made up of a list of xyzm values. + PartTypes is a list of types that define each of the surface patches. + The types can be any of the following module constants: TRIANGLE_STRIP, + TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. + If the z (elevation) value is not included, it defaults to 0. + If the m (measure) value is not included, it defaults to None (NoData).""" + shapeType = MULTIPATCH + polyShape = Shape(shapeType) + polyShape.parts = [] + polyShape.points = [] + for part in parts: + # set part index position + polyShape.parts.append(len(polyShape.points)) + # add points + for point in part: + # Ensure point is list + if not isinstance(point, list): + point = list(point) + polyShape.points.append(point) + polyShape.partTypes = partTypes + # write the shape + self.shape(polyShape) + + + def _shapeparts(self, parts, shapeType): + """Internal method for adding a shape that has multiple collections of points (parts): + lines, polygons, and multipoint shapes. + """ + polyShape = Shape(shapeType) + polyShape.parts = [] + polyShape.points = [] + # Make sure polygon rings (parts) are closed + if shapeType in (5,15,25,31): + for part in parts: + if part[0] != part[-1]: + part.append(part[0]) + # Add points and part indexes + for part in parts: + # set part index position + polyShape.parts.append(len(polyShape.points)) + # add points + for point in part: + # Ensure point is list + if not isinstance(point, list): + point = list(point) + polyShape.points.append(point) + # write the shape + self.shape(polyShape) + + def field(self, name, fieldType="C", size="50", decimal=0): + """Adds a dbf field descriptor to the shapefile.""" + if fieldType == "D": + size = "8" + decimal = 0 + elif fieldType == "L": + size = "1" + decimal = 0 + if len(self.fields) >= 2046: + raise ShapefileException( + "Shapefile Writer reached maximum number of fields: 2046.") + self.fields.append((name, fieldType, size, decimal)) + + +# Begin Testing +def test(**kwargs): + import doctest + doctest.NORMALIZE_WHITESPACE = 1 + verbosity = kwargs.get('verbose', 0) + if verbosity == 0: + print('Running doctests...') + + # ignore py2-3 unicode differences + import re + class Py23DocChecker(doctest.OutputChecker): + def check_output(self, want, got, optionflags): + if sys.version_info[0] == 2: + got = re.sub("u'(.*?)'", "'\\1'", got) + got = re.sub('u"(.*?)"', '"\\1"', got) + res = doctest.OutputChecker.check_output(self, want, got, optionflags) + return res + def summarize(self): + doctest.OutputChecker.summarize(True) + + # run tests + runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) + with open("README.md","rb") as fobj: + test = doctest.DocTestParser().get_doctest(string=fobj.read().decode("utf8").replace('\r\n','\n'), globs={}, name="README", filename="README.md", lineno=0) + failure_count, test_count = runner.run(test) + + # print results + if verbosity: + runner.summarize(True) + else: + if failure_count == 0: + print('All test passed successfully') + elif failure_count > 0: + runner.summarize(verbosity) + + return failure_count + +if __name__ == "__main__": + """ + Doctests are contained in the file 'README.md', and are tested using the built-in + testing libraries. + """ + failure_count = test() + sys.exit(failure_count) From 6fa168a5e92ec15f5ade2b3d37490502a27b4288 Mon Sep 17 00:00:00 2001 From: Kurt Schwehr Date: Sun, 15 Sep 2024 12:51:48 -0700 Subject: [PATCH 044/115] Initial sorting of imports. --- shapefile.py | 13 ++++++------- test_shapefile.py | 6 +++--- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/shapefile.py b/shapefile.py index 7d018aef..cace43fc 100644 --- a/shapefile.py +++ b/shapefile.py @@ -8,21 +8,20 @@ __version__ = "2.3.1" -from struct import pack, unpack, calcsize, error, Struct +import array +from datetime import date +import io +import logging import os +from struct import pack, unpack, calcsize, error, Struct import sys -import time -import array import tempfile -import logging -import io -from datetime import date +import time import zipfile # Create named logger logger = logging.getLogger(__name__) - # Module settings VERBOSE = True diff --git a/test_shapefile.py b/test_shapefile.py index ec73b457..ad7ea087 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1,7 +1,9 @@ """ This module tests the functionality of shapefile.py. """ -# std lib imports + +import datetime +import json import os.path import sys if sys.version_info.major == 3: @@ -9,8 +11,6 @@ # third party imports import pytest -import json -import datetime if sys.version_info.major == 2: # required by pytest for python <36 from pathlib2 import Path From 5dc5fda50204336f6f791618cf6b4d94911bc850 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 12:41:27 +0100 Subject: [PATCH 045/115] Create .pre-commit-config.yaml --- .pre-commit-config.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..18f0b5dd --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,12 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-yaml + - id: trailing-whitespace +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + name: isort (python) + From d759b98e11400d984ec4914dda915c1c94bd10c6 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 13:03:26 +0100 Subject: [PATCH 046/115] Run pre-commit hooks from build.yml Github Actions workflow --- .github/workflows/build.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dfeae580..b9f58955 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,7 +10,13 @@ on: workflow_dispatch: jobs: - build: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - uses: pre-commit/action@v3.0.1 + test: strategy: fail-fast: false From 677bce00a82f54af44088310771d43e9672d9f85 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 13:07:01 +0100 Subject: [PATCH 047/115] Run pre-commit hooks locally first --- .github/ISSUE_TEMPLATE/bug.yml | 8 +- .github/ISSUE_TEMPLATE/newfeature.yml | 2 +- .github/ISSUE_TEMPLATE/question.yml | 2 +- .github/ISSUE_TEMPLATE/unexpected.yml | 8 +- LICENSE.TXT | 2 +- README.md | 408 +++++++++++++------------- shapefile.py | 11 +- test_shapefile.py | 3 +- 8 files changed, 222 insertions(+), 222 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml index abd53835..aa7e47bc 100644 --- a/.github/ISSUE_TEMPLATE/bug.yml +++ b/.github/ISSUE_TEMPLATE/bug.yml @@ -7,7 +7,7 @@ body: id: pyshp-version attributes: label: PyShp Version - description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. + description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. placeholder: ... validations: required: true @@ -15,7 +15,7 @@ body: id: python-version attributes: label: Python Version - description: Please input the version of the Python executable. + description: Please input the version of the Python executable. placeholder: ... validations: required: true @@ -23,7 +23,7 @@ body: id: your-code attributes: label: Your code - description: Please copy-paste the relevant parts of your code or script that triggered the error. + description: Please copy-paste the relevant parts of your code or script that triggered the error. placeholder: ... render: shell validations: @@ -41,7 +41,7 @@ body: id: notes attributes: label: Other notes - description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? + description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? placeholder: ... validations: required: false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/newfeature.yml b/.github/ISSUE_TEMPLATE/newfeature.yml index f35326d7..afb043aa 100644 --- a/.github/ISSUE_TEMPLATE/newfeature.yml +++ b/.github/ISSUE_TEMPLATE/newfeature.yml @@ -15,7 +15,7 @@ body: id: contribute attributes: label: Contributions - description: Would you be interested to contribute code that adds this functionality through a Pull Request? We gladly accept PRs - it's much faster and you'll be added a contributor. + description: Would you be interested to contribute code that adds this functionality through a Pull Request? We gladly accept PRs - it's much faster and you'll be added a contributor. options: - label: I am interested in implementing the described feature request and submit as a PR. required: false \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml index 76dfb689..d8c0cd09 100644 --- a/.github/ISSUE_TEMPLATE/question.yml +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -6,7 +6,7 @@ body: - type: textarea id: question attributes: - label: What's your question? + label: What's your question? description: Please describe what you would like to know about PyShp, e.g. how to do something. placeholder: ... validations: diff --git a/.github/ISSUE_TEMPLATE/unexpected.yml b/.github/ISSUE_TEMPLATE/unexpected.yml index 07ed85c4..bf0a5778 100644 --- a/.github/ISSUE_TEMPLATE/unexpected.yml +++ b/.github/ISSUE_TEMPLATE/unexpected.yml @@ -7,7 +7,7 @@ body: id: pyshp-version attributes: label: PyShp Version - description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. + description: Please input the version of PyShp you used. If unsure, call `shapefile.__version__`. placeholder: ... validations: required: true @@ -15,7 +15,7 @@ body: id: python-version attributes: label: Python Version - description: Please input the version of the Python executable. + description: Please input the version of the Python executable. placeholder: ... validations: required: true @@ -23,7 +23,7 @@ body: id: your-code attributes: label: Your code - description: Please copy-paste the relevant parts of your code or script that you tried to run. + description: Please copy-paste the relevant parts of your code or script that you tried to run. placeholder: ... render: shell validations: @@ -48,7 +48,7 @@ body: id: notes attributes: label: Other notes - description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? + description: Please input any other notes that may be relevant, e.g. do you have any thoughts on what might be wrong? placeholder: ... validations: required: false \ No newline at end of file diff --git a/LICENSE.TXT b/LICENSE.TXT index ce33f7ba..d2b74462 100644 --- a/LICENSE.TXT +++ b/LICENSE.TXT @@ -1,5 +1,5 @@ The MIT License (MIT) - + Copyright © 2013 Joel Lawhead Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: diff --git a/README.md b/README.md index 94861bb2..8bfc869c 100644 --- a/README.md +++ b/README.md @@ -76,14 +76,14 @@ despite the numerous ways to store and exchange GIS data available today. Pyshp is compatible with Python 2.7-3.x. -This document provides examples for using PyShp to read and write shapefiles. However +This document provides examples for using PyShp to read and write shapefiles. However many more examples are continually added to the blog [http://GeospatialPython.com](http://GeospatialPython.com), -and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). +and by searching for PyShp on [https://gis.stackexchange.com](https://gis.stackexchange.com). Currently the sample census blockgroup shapefile referenced in the examples is available on the GitHub project site at [https://github.com/GeospatialPython/pyshp](https://github.com/GeospatialPython/pyshp). These examples are straight-forward and you can also easily run them against your -own shapefiles with minimal modification. +own shapefiles with minimal modification. Important: If you are new to GIS you should read about map projections. Please visit: [https://github.com/GeospatialPython/pyshp/wiki/Map-Projections](https://github.com/GeospatialPython/pyshp/wiki/Map-Projections) @@ -105,7 +105,7 @@ part of your geospatial project. ### New Features: -- Added support for pathlib and path-like shapefile filepaths (@mwtoews). +- Added support for pathlib and path-like shapefile filepaths (@mwtoews). - Allow reading individual file extensions via filepaths. ### Improvements: @@ -119,7 +119,7 @@ part of your geospatial project. - More robust handling of corrupt shapefiles (fixes #235) - Fix errors when writing to individual file-handles (fixes #237) - Revert previous decision to enforce geojson output ring orientation (detailed explanation at https://github.com/SciTools/cartopy/issues/2012) -- Fix test issues in environments without network access (@sebastic, @musicinmybrain). +- Fix test issues in environments without network access (@sebastic, @musicinmybrain). ## 2.2.0 @@ -132,7 +132,7 @@ part of your geospatial project. ### Improvements: -- More examples and restructuring of README. +- More examples and restructuring of README. - More informative Shape to geojson warnings (see #219). - Add shapefile.VERBOSE flag to control warnings verbosity (default True). - Shape object information when calling repr(). @@ -189,7 +189,7 @@ part of your geospatial project. ### New Features: -- Added back read/write support for unicode field names. +- Added back read/write support for unicode field names. - Improved Record representation - More support for geojson on Reader, ShapeRecord, ShapeRecords, and shapes() @@ -201,38 +201,38 @@ part of your geospatial project. ## 2.0.0 -The newest version of PyShp, version 2.0 introduced some major new improvements. +The newest version of PyShp, version 2.0 introduced some major new improvements. A great thanks to all who have contributed code and raised issues, and for everyone's -patience and understanding during the transition period. -Some of the new changes are incompatible with previous versions. +patience and understanding during the transition period. +Some of the new changes are incompatible with previous versions. Users of the previous version 1.x should therefore take note of the following changes -(Note: Some contributor attributions may be missing): +(Note: Some contributor attributions may be missing): ### Major Changes: -- Full support for unicode text, with custom encoding, and exception handling. - - Means that the Reader returns unicode, and the Writer accepts unicode. -- PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. +- Full support for unicode text, with custom encoding, and exception handling. + - Means that the Reader returns unicode, and the Writer accepts unicode. +- PyShp has been simplified to a pure input-output library using the Reader and Writer classes, dropping the Editor class. - Switched to a new streaming approach when writing files, keeping memory-usage at a minimum: - - Specify filepath/destination and text encoding when creating the Writer. - - The file is written incrementally with each call to shape/record. - - Adding shapes is now done using dedicated methods for each shapetype. + - Specify filepath/destination and text encoding when creating the Writer. + - The file is written incrementally with each call to shape/record. + - Adding shapes is now done using dedicated methods for each shapetype. - Reading shapefiles is now more convenient: - - Shapefiles can be opened using the context manager, and files are properly closed. - - Shapefiles can be iterated, have a length, and supports the geo interface. + - Shapefiles can be opened using the context manager, and files are properly closed. + - Shapefiles can be iterated, have a length, and supports the geo interface. - New ways of inspecting shapefile metadata by printing. [@megies] - More convenient accessing of Record values as attributes. [@philippkraft] - - More convenient shape type name checking. [@megies] -- Add more support and documentation for MultiPatch 3D shapes. -- The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. -- Better documentation of previously unclear aspects, such as field types. + - More convenient shape type name checking. [@megies] +- Add more support and documentation for MultiPatch 3D shapes. +- The Reader "elevation" and "measure" attributes now renamed "zbox" and "mbox", to make it clear they refer to the min/max values. +- Better documentation of previously unclear aspects, such as field types. ### Important Fixes: - More reliable/robust: - Fixed shapefile bbox error for empty or point type shapefiles. [@mcuprjak] - Reading and writing Z and M type shapes is now more robust, fixing many errors, and has been added to the documentation. [@ShinNoNoir] - - Improved parsing of field value types, fixed errors and made more flexible. + - Improved parsing of field value types, fixed errors and made more flexible. - Fixed bug when writing shapefiles with datefield and date values earlier than 1900 [@megies] - Fix some geo interface errors, including checking polygon directions. - Bug fixes for reading from case sensitive file names, individual files separately, and from file-like objects. [@gastoneb, @kb003308, @erickskb] @@ -275,7 +275,7 @@ OR >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") OR any of the other 5+ formats which are potentially part of a shapefile. The -library does not care about file extensions. You can also specify that you only +library does not care about file extensions. You can also specify that you only want to read some of the file extensions through the use of keyword arguments: @@ -283,7 +283,7 @@ want to read some of the file extensions through the use of keyword arguments: #### Reading Shapefiles from Zip Files -If your shapefile is wrapped inside a zip file, the library is able to handle that too, meaning you don't have to worry about unzipping the contents: +If your shapefile is wrapped inside a zip file, the library is able to handle that too, meaning you don't have to worry about unzipping the contents: >>> sf = shapefile.Reader("shapefiles/blockgroups.zip") @@ -295,7 +295,7 @@ If the zip file contains multiple shapefiles, just specify which shapefile to re #### Reading Shapefiles from URLs -Finally, you can use all of the above methods to read shapefiles directly from the internet, by giving a url instead of a local path, e.g.: +Finally, you can use all of the above methods to read shapefiles directly from the internet, by giving a url instead of a local path, e.g.: >>> # from a zipped shapefile on website @@ -337,8 +337,8 @@ objects are properly closed when done reading the data: #### Reading Shapefile Meta-Data Shapefiles have a number of attributes for inspecting the file contents. -A shapefile is a container for a specific type of geometry, and this can be checked using the -shapeType attribute. +A shapefile is a container for a specific type of geometry, and this can be checked using the +shapeType attribute. >>> sf = shapefile.Reader("shapefiles/blockgroups.dbf") @@ -364,7 +364,7 @@ the existing shape types are not sequential: - POLYGONM = 25 - MULTIPOINTM = 28 - MULTIPATCH = 31 - + Based on this we can see that our blockgroups shapefile contains Polygon type shapes. The shape types are also defined as constants in the shapefile module, so that we can compare types more intuitively: @@ -378,8 +378,8 @@ For convenience, you can also get the name of the shape type as a string: >>> sf.shapeTypeName == 'POLYGON' True - -Other pieces of meta-data that we can check include the number of features + +Other pieces of meta-data that we can check include the number of features and the bounding box area the shapefile covers: @@ -387,10 +387,10 @@ and the bounding box area the shapefile covers: 663 >>> sf.bbox [-122.515048, 37.652916, -122.327622, 37.863433] - + Finally, if you would prefer to work with the entire shapefile in a different format, you can convert all of it to a GeoJSON dictionary, although you may lose -some information in the process, such as z- and m-values: +some information in the process, such as z- and m-values: >>> sf.__geo_interface__['type'] @@ -415,7 +415,7 @@ each shape record. >>> len(shapes) 663 - + To read a single shape by calling its index use the shape() method. The index is the shape's count from 0. So to read the 8th shape record you would use its index which is 7. @@ -457,12 +457,12 @@ shapeType Point do not have a bounding box 'bbox'. >>> shapes[3].shapeType 5 - * `shapeTypeName`: a string representation of the type of shape as defined by shapeType. Read-only. + * `shapeTypeName`: a string representation of the type of shape as defined by shapeType. Read-only. >>> shapes[3].shapeTypeName 'POLYGON' - + * `bbox`: If the shape type contains multiple points this tuple describes the lower left (x,y) coordinate and upper right corner coordinate creating a complete box around the points. If the shapeType is a @@ -496,7 +496,7 @@ shapeType Point do not have a bounding box 'bbox'. >>> ['%.3f' % coord for coord in shape] ['-122.471', '37.787'] -In most cases, however, if you need to do more than just type or bounds checking, you may want +In most cases, however, if you need to do more than just type or bounds checking, you may want to convert the geometry to the more human-readable [GeoJSON format](http://geojson.org), where lines and polygons are grouped for you: @@ -505,7 +505,7 @@ where lines and polygons are grouped for you: >>> geoj = s.__geo_interface__ >>> geoj["type"] 'MultiPolygon' - + The results from the shapes() method similarly supports converting to GeoJSON: @@ -514,12 +514,12 @@ The results from the shapes() method similarly supports converting to GeoJSON: Note: In some cases, if the conversion from shapefile geometry to GeoJSON encountered any problems or potential issues, a warning message will be displayed with information about the affected -geometry. To ignore or suppress these warnings, you can disable this behavior by setting the -module constant VERBOSE to False: +geometry. To ignore or suppress these warnings, you can disable this behavior by setting the +module constant VERBOSE to False: >>> shapefile.VERBOSE = False - + ### Reading Records @@ -534,12 +534,12 @@ You can call the "fields" attribute of the shapefile as a Python list. Each field is a Python list with the following information: * Field name: the name describing the data at this column index. - * Field type: the type of data at this column index. Types can be: + * Field type: the type of data at this column index. Types can be: * "C": Characters, text. * "N": Numbers, with or without decimals. * "F": Floats (same as "N"). - * "L": Logical, for boolean True/False values. - * "D": Dates. + * "L": Logical, for boolean True/False values. + * "D": Dates. * "M": Memo, has no meaning within a GIS and is part of the xbase spec instead. * Field length: the length of the data found at this column index. Older GIS software may truncate this length to 8 or 11 characters for "Character" @@ -571,11 +571,11 @@ attribute: ... ["UNITS3_9", "N", 8, 0], ["UNITS10_49", "N", 8, 0], ... ["UNITS50_UP", "N", 8, 0], ["MOBILEHOME", "N", 7, 0]] -The first field of a dbf file is always a 1-byte field called "DeletionFlag", -which indicates records that have been deleted but not removed. However, -since this flag is very rarely used, PyShp currently will return all records -regardless of their deletion flag, and the flag is also not included in the list of -record values. In other words, the DeletionFlag field has no real purpose, and +The first field of a dbf file is always a 1-byte field called "DeletionFlag", +which indicates records that have been deleted but not removed. However, +since this flag is very rarely used, PyShp currently will return all records +regardless of their deletion flag, and the flag is also not included in the list of +record values. In other words, the DeletionFlag field has no real purpose, and should in most cases be ignored. For instance, to get a list of all fieldnames: @@ -593,10 +593,10 @@ To read a single record call the record() method with the record's index: >>> rec = sf.record(3) - + Each record is a list-like Record object containing the values corresponding to each field in the field list (except the DeletionFlag). A record's values can be accessed by positional indexing or slicing. -For example in the blockgroups shapefile the 2nd and 3rd fields are the blockgroup id +For example in the blockgroups shapefile the 2nd and 3rd fields are the blockgroup id and the 1990 population count of that San Francisco blockgroup: @@ -604,7 +604,7 @@ and the 1990 population count of that San Francisco blockgroup: ['060750601001', 4715] For simpler access, the fields of a record can also accessed via the name of the field, -either as a key or as an attribute name. The blockgroup id (BKG_KEY) of the blockgroups shapefile +either as a key or as an attribute name. The blockgroup id (BKG_KEY) of the blockgroups shapefile can also be retrieved as: @@ -613,7 +613,7 @@ can also be retrieved as: >>> rec.BKG_KEY '060750601001' - + The record values can be easily integrated with other programs by converting it to a field-value dictionary: @@ -621,13 +621,13 @@ The record values can be easily integrated with other programs by converting it >>> sorted(dct.items()) [('AGE_18_29', 1467), ('AGE_30_49', 1681), ('AGE_50_64', 92), ('AGE_5_17', 848), ('AGE_65_UP', 30), ('AGE_UNDER5', 597), ('AMERI_ES', 6), ('AREA', 2.34385), ('ASIAN_PI', 452), ('BKG_KEY', '060750601001'), ('BLACK', 1007), ('DIVORCED', 149), ('FEMALES', 2095), ('FHH_CHILD', 16), ('HISPANIC', 416), ('HOUSEHOLDS', 1195), ('HSEHLD_1_F', 40), ('HSEHLD_1_M', 22), ('HSE_UNITS', 1258), ('MALES', 2620), ('MARHH_CHD', 79), ('MARHH_NO_C', 958), ('MARRIED', 2021), ('MEDIANRENT', 739), ('MEDIAN_VAL', 337500), ('MHH_CHILD', 0), ('MOBILEHOME', 0), ('NEVERMARRY', 703), ('OTHER', 288), ('OWNER_OCC', 66), ('POP1990', 4715), ('POP90_SQMI', 2011.6), ('RENTER_OCC', 3733), ('SEPARATED', 49), ('UNITS10_49', 49), ('UNITS2', 160), ('UNITS3_9', 672), ('UNITS50_UP', 0), ('UNITS_1ATT', 302), ('UNITS_1DET', 43), ('VACANT', 93), ('WHITE', 2962), ('WIDOWED', 37)] -If at a later point you need to check the record's index position in the original +If at a later point you need to check the record's index position in the original shapefile, you can do this through the "oid" attribute: >>> rec.oid 3 - + ### Reading Geometry and Records Simultaneously You may want to examine both the geometry and the attributes for a record at @@ -663,13 +663,13 @@ To get the 4th shape record from the blockgroups shapefile use the third index: >>> shapeRec = sf.shapeRecord(3) >>> shapeRec.record[1:3] ['060750601001', 4715] - + Each individual shape record also supports the _\_geo_interface\_\_ to convert it to a GeoJSON feature: >>> shapeRec.__geo_interface__['type'] 'Feature' - + ## Writing Shapefiles @@ -697,7 +697,7 @@ the file path and name to save to: >>> w = shapefile.Writer('shapefiles/test/testfile') >>> w.field('field1', 'C') - + File extensions are optional when reading or writing shapefiles. If you specify them PyShp ignores them anyway. When you save files you can specify a base file name that is used for all three file types. Or you can specify a name for @@ -706,9 +706,9 @@ one or more file types: >>> w = shapefile.Writer(dbf='shapefiles/test/onlydbf.dbf') >>> w.field('field1', 'C') - + In that case, any file types not assigned will not -save and only file types with file names will be saved. +save and only file types with file names will be saved. #### Writing Shapefiles to File-Like Objects @@ -738,14 +738,14 @@ write to them: >>> r = shapefile.Reader(shp=shp, shx=shx, dbf=dbf) >>> len(r) 1 - - + + #### Writing Shapefiles Using the Context Manager The "Writer" class automatically closes the open files and writes the final headers once it is garbage collected. -In case of a crash and to make the code more readable, it is nevertheless recommended -you do this manually by calling the "close()" method: +In case of a crash and to make the code more readable, it is nevertheless recommended +you do this manually by calling the "close()" method: >>> w.close() @@ -757,15 +757,15 @@ objects are properly closed and final headers written once you exit the with-cla >>> with shapefile.Writer("shapefiles/test/contextwriter") as w: ... w.field('field1', 'C') ... pass - + #### Setting the Shape Type The shape type defines the type of geometry contained in the shapefile. All of the shapes must match the shape type setting. -There are three ways to set the shape type: - * Set it when creating the class instance. - * Set it by assigning a value to an existing class instance. +There are three ways to set the shape type: + * Set it when creating the class instance. + * Set it by assigning a value to an existing class instance. * Set it automatically to the type of the first non-null shape by saving the shapefile. To manually set the shape type for a Writer object when creating the Writer: @@ -784,14 +784,14 @@ OR you can set it after the Writer is created: >>> w.shapeType 1 - + ### Adding Records -Before you can add records you must first create the fields that define what types of -values will go into each attribute. +Before you can add records you must first create the fields that define what types of +values will go into each attribute. -There are several different field types, all of which support storing None values as NULL. +There are several different field types, all of which support storing None values as NULL. Text fields are created using the 'C' type, and the third 'size' argument can be customized to the expected length of text values to save space: @@ -804,12 +804,12 @@ length of text values to save space: >>> w.null() >>> w.record('Hello', 'World', 'World'*50) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == ['Hello', 'World', 'World'*50] -Date fields are created using the 'D' type, and can be created using either -date objects, lists, or a YYYYMMDD formatted string. +Date fields are created using the 'D' type, and can be created using either +date objects, lists, or a YYYYMMDD formatted string. Field length or decimal have no impact on this type: @@ -825,18 +825,18 @@ Field length or decimal have no impact on this type: >>> w.record('19980130') >>> w.record(None) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == [date(1898,1,30)] >>> assert r.record(1) == [date(1998,1,30)] >>> assert r.record(2) == [date(1998,1,30)] >>> assert r.record(3) == [None] -Numeric fields are created using the 'N' type (or the 'F' type, which is exactly the same). -By default the fourth decimal argument is set to zero, essentially creating an integer field. -To store floats you must set the decimal argument to the precision of your choice. -To store very large numbers you must increase the field length size to the total number of digits -(including comma and minus). +Numeric fields are created using the 'N' type (or the 'F' type, which is exactly the same). +By default the fourth decimal argument is set to zero, essentially creating an integer field. +To store floats you must set the decimal argument to the precision of your choice. +To store very large numbers you must increase the field length size to the total number of digits +(including comma and minus). >>> w = shapefile.Writer('shapefiles/test/dtype') @@ -852,15 +852,15 @@ To store very large numbers you must increase the field length size to the total >>> w.record(INT=nr, LOWPREC=nr, MEDPREC=nr, HIGHPREC=-3.2302e-25, FTYPE=nr, LARGENR=int(nr)*10**100) >>> w.record(None, None, None, None, None, None) >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> assert r.record(0) == [1, 1.32, 1.3217328, -3.2302e-25, 1.3217328, 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000] >>> assert r.record(1) == [None, None, None, None, None, None] - -Finally, we can create boolean fields by setting the type to 'L'. -This field can take True or False values, or 1 (True) or 0 (False). -None is interpreted as missing. + +Finally, we can create boolean fields by setting the type to 'L'. +This field can take True or False values, or 1 (True) or 0 (False). +None is interpreted as missing. >>> w = shapefile.Writer('shapefiles/test/dtype') @@ -878,7 +878,7 @@ None is interpreted as missing. >>> w.record(None) >>> w.record("Nonsense") >>> w.close() - + >>> r = shapefile.Reader('shapefiles/test/dtype') >>> r.record(0) Record #0: [True] @@ -892,7 +892,7 @@ None is interpreted as missing. Record #4: [None] >>> r.record(5) Record #5: [None] - + You can also add attributes using keyword arguments where the keys are field names. @@ -909,12 +909,12 @@ You can also add attributes using keyword arguments where the keys are field nam Geometry is added using one of several convenience methods. The "null" method is used for null shapes, "point" is used for point shapes, "multipoint" is used for multipoint shapes, "line" for lines, -"poly" for polygons. +"poly" for polygons. **Adding a Null shape** -A shapefile may contain some records for which geometry is not available, and may be set using the "null" method. -Because Null shape types (shape type 0) have no geometry the "null" method is called without any arguments. +A shapefile may contain some records for which geometry is not available, and may be set using the "null" method. +Because Null shape types (shape type 0) have no geometry the "null" method is called without any arguments. >>> w = shapefile.Writer('shapefiles/test/null') @@ -928,59 +928,59 @@ Because Null shape types (shape type 0) have no geometry the "null" method is ca **Adding a Point shape** Point shapes are added using the "point" method. A point is specified by an x and -y value. +y value. >>> w = shapefile.Writer('shapefiles/test/point') >>> w.field('name', 'C') - - >>> w.point(122, 37) + + >>> w.point(122, 37) >>> w.record('point1') - + >>> w.close() **Adding a MultiPoint shape** -If your point data allows for the possibility of multiple points per feature, use "multipoint" instead. -These are specified as a list of xy point coordinates. +If your point data allows for the possibility of multiple points per feature, use "multipoint" instead. +These are specified as a list of xy point coordinates. >>> w = shapefile.Writer('shapefiles/test/multipoint') >>> w.field('name', 'C') - - >>> w.multipoint([[122,37], [124,32]]) + + >>> w.multipoint([[122,37], [124,32]]) >>> w.record('multipoint1') - + >>> w.close() - + **Adding a LineString shape** -For LineString shapefiles, each shape is given as a list of one or more linear features. -Each of the linear features must have at least two points. - - +For LineString shapefiles, each shape is given as a list of one or more linear features. +Each of the linear features must have at least two points. + + >>> w = shapefile.Writer('shapefiles/test/line') >>> w.field('name', 'C') - + >>> w.line([ ... [[1,5],[5,5],[5,1],[3,3],[1,1]], # line 1 ... [[3,2],[2,6]] # line 2 ... ]) - + >>> w.record('linestring1') - + >>> w.close() - + **Adding a Polygon shape** Similarly to LineString, Polygon shapes consist of multiple polygons, and must be given as a list of polygons. -The main difference is that polygons must have at least 4 points and the last point must be the same as the first. +The main difference is that polygons must have at least 4 points and the last point must be the same as the first. It's also okay if you forget to repeat the first point at the end; PyShp automatically checks and closes the polygons if you don't. It's important to note that for Polygon shapefiles, your polygon coordinates must be ordered in a clockwise direction. If any of the polygons have holes, then the hole polygon coordinates must be ordered in a counterclockwise direction. -The direction of your polygons determines how shapefile readers will distinguish between polygon outlines and holes. +The direction of your polygons determines how shapefile readers will distinguish between polygon outlines and holes. >>> w = shapefile.Writer('shapefiles/test/polygon') @@ -992,13 +992,13 @@ The direction of your polygons determines how shapefile readers will distinguish ... [[15,2], [17,6], [22,7]] # poly 2 ... ]) >>> w.record('polygon1') - + >>> w.close() - + **Adding from an existing Shape object** Finally, geometry can be added by passing an existing "Shape" object to the "shape" method. -You can also pass it any GeoJSON dictionary or _\_geo_interface\_\_ compatible object. +You can also pass it any GeoJSON dictionary or _\_geo_interface\_\_ compatible object. This can be particularly useful for copying from one file to another: @@ -1011,14 +1011,14 @@ This can be particularly useful for copying from one file to another: >>> for shaperec in r.iterShapeRecords(): ... w.record(*shaperec.record) ... w.shape(shaperec.shape) - + >>> # or GeoJSON dicts >>> for shaperec in r.iterShapeRecords(): ... w.record(*shaperec.record) ... w.shape(shaperec.shape.__geo_interface__) - - >>> w.close() - + + >>> w.close() + ### Geometry and Record Balancing @@ -1027,17 +1027,17 @@ number of records equals the number of shapes to create a valid shapefile. You must take care to add records and shapes in the same order so that the record data lines up with the geometry data. For example: - + >>> w = shapefile.Writer('shapefiles/test/balancing', shapeType=shapefile.POINT) >>> w.field("field1", "C") >>> w.field("field2", "C") - + >>> w.record("row", "one") >>> w.point(1, 1) - + >>> w.record("row", "two") >>> w.point(2, 2) - + To help prevent accidental misalignment PyShp has an "auto balance" feature to make sure when you add either a shape or a record the two sides of the equation line up. This way if you forget to update an entry the @@ -1050,7 +1050,7 @@ the attribute autoBalance to 1 or True: >>> w.record("row", "three") >>> w.record("row", "four") >>> w.point(4, 4) - + >>> w.recNum == w.shpNum True @@ -1059,7 +1059,7 @@ to ensure the other side is up to date. When balancing is used null shapes are created on the geometry side or records with a value of "NULL" for each field is created on the attribute side. This gives you flexibility in how you build the shapefile. -You can create all of the shapes and then create all of the records or vice versa. +You can create all of the shapes and then create all of the records or vice versa. >>> w.autoBalance = 0 @@ -1069,16 +1069,16 @@ You can create all of the shapes and then create all of the records or vice vers >>> w.point(5, 5) >>> w.point(6, 6) >>> w.balance() - + >>> w.recNum == w.shpNum True If you do not use the autoBalance() or balance() method and forget to manually balance the geometry and attributes the shapefile will be viewed as corrupt by most shapefile software. - + ### Writing .prj files -A .prj file, or projection file, is a simple text file that stores a shapefile's map projection and coordinate reference system to help mapping software properly locate the geometry on a map. If you don't have one, you may get confusing errors when you try and use the shapefile you created. The GIS software may complain that it doesn't know the shapefile's projection and refuse to accept it, it may assume the shapefile is the same projection as the rest of your GIS project and put it in the wrong place, or it might assume the coordinates are an offset in meters from latitude and longitude 0,0 which will put your data in the middle of the ocean near Africa. The text in the .prj file is a [Well-Known-Text (WKT) projection string](https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems). Projection strings can be quite long so they are often referenced using numeric codes call EPSG codes. The .prj file must have the same base name as your shapefile. So for example if you have a shapefile named "myPoints.shp", the .prj file must be named "myPoints.prj". +A .prj file, or projection file, is a simple text file that stores a shapefile's map projection and coordinate reference system to help mapping software properly locate the geometry on a map. If you don't have one, you may get confusing errors when you try and use the shapefile you created. The GIS software may complain that it doesn't know the shapefile's projection and refuse to accept it, it may assume the shapefile is the same projection as the rest of your GIS project and put it in the wrong place, or it might assume the coordinates are an offset in meters from latitude and longitude 0,0 which will put your data in the middle of the ocean near Africa. The text in the .prj file is a [Well-Known-Text (WKT) projection string](https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems). Projection strings can be quite long so they are often referenced using numeric codes call EPSG codes. The .prj file must have the same base name as your shapefile. So for example if you have a shapefile named "myPoints.shp", the .prj file must be named "myPoints.prj". If you're using the same projection over and over, the following is a simple way to create the .prj file assuming your base filename is stored in a variable called "filename": @@ -1092,17 +1092,17 @@ If you're using the same projection over and over, the following is a simple way prj.write(wkt) ``` -If you need to dynamically fetch WKT projection strings, you can use the pure Python [PyCRS](https://github.com/karimbahgat/PyCRS) module which has a number of useful features. +If you need to dynamically fetch WKT projection strings, you can use the pure Python [PyCRS](https://github.com/karimbahgat/PyCRS) module which has a number of useful features. # Advanced Use ## Common Errors and Fixes -Below we list some commonly encountered errors and ways to fix them. +Below we list some commonly encountered errors and ways to fix them. ### Warnings and Logging -By default, PyShp chooses to be transparent and provide the user with logging information and warnings about non-critical issues when reading or writing shapefiles. This behavior is controlled by the module constant `VERBOSE` (which defaults to True). If you would rather suppress this information, you can simply set this to False: +By default, PyShp chooses to be transparent and provide the user with logging information and warnings about non-critical issues when reading or writing shapefiles. This behavior is controlled by the module constant `VERBOSE` (which defaults to True). If you would rather suppress this information, you can simply set this to False: >>> shapefile.VERBOSE = False @@ -1115,21 +1115,21 @@ All logging happens under the namespace `shapefile`. So another way to suppress ### Shapefile Encoding Errors -PyShp supports reading and writing shapefiles in any language or character encoding, and provides several options for decoding and encoding text. -Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't have to specify the encoding. -If you encounter an encoding error when reading a shapefile, this means the shapefile was likely written in a non-utf8 encoding. +PyShp supports reading and writing shapefiles in any language or character encoding, and provides several options for decoding and encoding text. +Most shapefiles are written in UTF-8 encoding, PyShp's default encoding, so in most cases you don't have to specify the encoding. +If you encounter an encoding error when reading a shapefile, this means the shapefile was likely written in a non-utf8 encoding. For instance, when working with English language shapefiles, a common reason for encoding errors is that the shapefile was written in Latin-1 encoding. -For reading shapefiles in any non-utf8 encoding, such as Latin-1, just -supply the encoding option when creating the Reader class. +For reading shapefiles in any non-utf8 encoding, such as Latin-1, just +supply the encoding option when creating the Reader class. >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="latin1") >>> r.record(0) == [2, u'Ñandú'] True - -Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such -as UTF-8. Assuming the new encoding supports the characters you are trying to write, reading it back in -should give you the same unicode string you started with. + +Once you have loaded the shapefile, you may choose to save it using another more supportive encoding such +as UTF-8. Assuming the new encoding supports the characters you are trying to write, reading it back in +should give you the same unicode string you started with. >>> w = shapefile.Writer("shapefiles/test/latin_as_utf8.shp", encoding="utf8") @@ -1137,15 +1137,15 @@ should give you the same unicode string you started with. >>> w.record(*r.record(0)) >>> w.null() >>> w.close() - + >>> r = shapefile.Reader("shapefiles/test/latin_as_utf8.shp", encoding="utf8") >>> r.record(0) == [2, u'Ñandú'] True - + If you supply the wrong encoding and the string is unable to be decoded, PyShp will by default raise an exception. If however, on rare occasion, you are unable to find the correct encoding and want to ignore or replace encoding errors, you can specify the "encodingErrors" to be used by the decode method. This -applies to both reading and writing. +applies to both reading and writing. >>> r = shapefile.Reader("shapefiles/test/latin1.shp", encoding="ascii", encodingErrors="replace") @@ -1156,8 +1156,8 @@ applies to both reading and writing. ## Reading Large Shapefiles -Despite being a lightweight library, PyShp is designed to be able to read shapefiles of any size, allowing you to work with hundreds of thousands or even millions -of records and complex geometries. +Despite being a lightweight library, PyShp is designed to be able to read shapefiles of any size, allowing you to work with hundreds of thousands or even millions +of records and complex geometries. ### Iterating through a shapefile @@ -1167,22 +1167,22 @@ As an example, let's load this Natural Earth shapefile of more than 4000 global >>> sf = shapefile.Reader("https://github.com/nvkelso/natural-earth-vector/blob/master/10m_cultural/ne_10m_admin_1_states_provinces?raw=true") When first creating the Reader class, the library only reads the header information -and leaves the rest of the file contents alone. Once you call the records() and shapes() -methods however, it will attempt to read the entire file into memory at once. +and leaves the rest of the file contents alone. Once you call the records() and shapes() +methods however, it will attempt to read the entire file into memory at once. For very large files this can result in MemoryError. So when working with large files it is recommended to use instead the iterShapes(), iterRecords(), or iterShapeRecords() -methods instead. These iterate through the file contents one at a time, enabling you to loop -through them while keeping memory usage at a minimum. +methods instead. These iterate through the file contents one at a time, enabling you to loop +through them while keeping memory usage at a minimum. >>> for shape in sf.iterShapes(): ... # do something here ... pass - + >>> for rec in sf.iterRecords(): ... # do something here ... pass - + >>> for shapeRec in sf.iterShapeRecords(): ... # do something here ... pass @@ -1202,7 +1202,7 @@ By default when reading the attribute records of a shapefile, pyshp unpacks and ... pass >>> rec Record #4595: ['Birgu', 'Malta'] - + ### Attribute filtering In many cases, we aren't interested in all entries of a shapefile, but rather only want to retrieve a small subset of records by filtering on some attribute. To avoid wasting time reading records and shapes that we don't need, we can start by iterating only the records and fields of interest, check if the record matches some condition as a way to filter the data, and finally load the full record and shape geometry for those that meet the condition: @@ -1222,7 +1222,7 @@ In many cases, we aren't interested in all entries of a shapefile, but rather on 'Maekel' 'Anseba' -Selectively reading only the necessary data in this way is particularly useful for efficiently processing a limited subset of data from very large files or when looping through a large number of files, especially if they contain large attribute tables or complex shape geometries. +Selectively reading only the necessary data in this way is particularly useful for efficiently processing a limited subset of data from very large files or when looping through a large number of files, especially if they contain large attribute tables or complex shape geometries. ### Spatial filtering @@ -1253,23 +1253,23 @@ Another common use-case is that we only want to read those records that are loca Record #2037: ['Al Hudaydah', 'Yemen'] Record #3741: ['Anseba', 'Eritrea'] -This functionality means that shapefiles can be used as a bare-bones spatially indexed database, with very fast bounding box queries for even the largest of shapefiles. Note that, as with all spatial indexing, this method does not guarantee that the *geometries* of the resulting matches overlap the queried region, only that their *bounding boxes* overlap. +This functionality means that shapefiles can be used as a bare-bones spatially indexed database, with very fast bounding box queries for even the largest of shapefiles. Note that, as with all spatial indexing, this method does not guarantee that the *geometries* of the resulting matches overlap the queried region, only that their *bounding boxes* overlap. ## Writing large shapefiles -Similar to the Reader class, the shapefile Writer class uses a streaming approach to keep memory -usage at a minimum and allow writing shapefiles of arbitrarily large sizes. The library takes care of this under-the-hood by immediately -writing each geometry and record to disk the moment they -are added using shape() or record(). Once the writer is closed, exited, or garbage -collected, the final header information is calculated and written to the beginning of -the file. +Similar to the Reader class, the shapefile Writer class uses a streaming approach to keep memory +usage at a minimum and allow writing shapefiles of arbitrarily large sizes. The library takes care of this under-the-hood by immediately +writing each geometry and record to disk the moment they +are added using shape() or record(). Once the writer is closed, exited, or garbage +collected, the final header information is calculated and written to the beginning of +the file. ### Merging multiple shapefiles -This means that it's possible to merge hundreds or thousands of shapefiles, as -long as you iterate through the source files to avoid loading everything into +This means that it's possible to merge hundreds or thousands of shapefiles, as +long as you iterate through the source files to avoid loading everything into memory. The following example copies the contents of a shapefile to a new file 10 times: >>> # create writer @@ -1295,12 +1295,12 @@ memory. The following example copies the contents of a shapefile to a new file 1 >>> # close the writer >>> w.close() -In this trivial example, we knew that all files had the exact same field names, ordering, and types. In other scenarios, you will have to additionally make sure that all shapefiles have the exact same fields in the same order, and that they all contain the same geometry type. +In this trivial example, we knew that all files had the exact same field names, ordering, and types. In other scenarios, you will have to additionally make sure that all shapefiles have the exact same fields in the same order, and that they all contain the same geometry type. ### Editing shapefiles -If you need to edit a shapefile you would have to read the -file one record at a time, modify or filter the contents, and write it back out. For instance, to create a copy of a shapefile that only keeps a subset of relevant fields: +If you need to edit a shapefile you would have to read the +file one record at a time, modify or filter the contents, and write it back out. For instance, to create a copy of a shapefile that only keeps a subset of relevant fields: >>> # create writer >>> w = shapefile.Writer('shapefiles/test/edit') @@ -1325,7 +1325,7 @@ file one record at a time, modify or filter the contents, and write it back out. ## 3D and Other Geometry Types Most shapefiles store conventional 2D points, lines, or polygons. But the shapefile format is also capable -of storing various other types of geometries as well, including complex 3D surfaces and objects. +of storing various other types of geometries as well, including complex 3D surfaces and objects. ### Shapefiles with measurement (M) values @@ -1338,107 +1338,107 @@ or by simply omitting the third M-coordinate. >>> w = shapefile.Writer('shapefiles/test/linem') >>> w.field('name', 'C') - + >>> w.linem([ ... [[1,5,0],[5,5],[5,1,3],[3,3,None],[1,1,0]], # line with one omitted and one missing M-value ... [[3,2],[2,6]] # line without any M-values ... ]) - + >>> w.record('linem1') - + >>> w.close() - + Shapefiles containing M-values can be examined in several ways: >>> r = shapefile.Reader('shapefiles/test/linem') - + >>> r.mbox # the lower and upper bound of M-values in the shapefile [0.0, 3.0] - + >>> r.shape(0).m # flat list of M-values [0.0, None, 3.0, None, 0.0, None, None] - + ### Shapefiles with elevation (Z) values -Elevation shape types are shapes that include an elevation value at each vertex, for instance elevation from a GPS device. -Shapes with elevation (Z) values are added with the following methods: "pointz", "multipointz", "linez", and "polyz". +Elevation shape types are shapes that include an elevation value at each vertex, for instance elevation from a GPS device. +Shapes with elevation (Z) values are added with the following methods: "pointz", "multipointz", "linez", and "polyz". The Z-values are specified by adding a third Z value to each XY coordinate. Z-values do not support the concept of missing data, but if you omit the third Z-coordinate it will default to 0. Note that Z-type shapes also support measurement (M) values added -as a fourth M-coordinate. This too is optional. - - +as a fourth M-coordinate. This too is optional. + + >>> w = shapefile.Writer('shapefiles/test/linez') >>> w.field('name', 'C') - + >>> w.linez([ ... [[1,5,18],[5,5,20],[5,1,22],[3,3],[1,1]], # line with some omitted Z-values ... [[3,2],[2,6]], # line without any Z-values ... [[3,2,15,0],[2,6,13,3],[1,9,14,2]] # line with both Z- and M-values ... ]) - + >>> w.record('linez1') - + >>> w.close() - + To examine a Z-type shapefile you can do: >>> r = shapefile.Reader('shapefiles/test/linez') - + >>> r.zbox # the lower and upper bound of Z-values in the shapefile [0.0, 22.0] - + >>> r.shape(0).z # flat list of Z-values [18.0, 20.0, 22.0, 0.0, 0.0, 0.0, 0.0, 15.0, 13.0, 14.0] ### 3D MultiPatch Shapefiles -Multipatch shapes are useful for storing composite 3-Dimensional objects. +Multipatch shapes are useful for storing composite 3-Dimensional objects. A MultiPatch shape represents a 3D object made up of one or more surface parts. Each surface in "parts" is defined by a list of XYZM values (Z and M values optional), and its corresponding type is -given in the "partTypes" argument. The part type decides how the coordinate sequence is to be interpreted, and can be one +given in the "partTypes" argument. The part type decides how the coordinate sequence is to be interpreted, and can be one of the following module constants: TRIANGLE_STRIP, TRIANGLE_FAN, OUTER_RING, INNER_RING, FIRST_RING, or RING. -For instance, a TRIANGLE_STRIP may be used to represent the walls of a building, combined with a TRIANGLE_FAN to represent -its roof: +For instance, a TRIANGLE_STRIP may be used to represent the walls of a building, combined with a TRIANGLE_FAN to represent +its roof: >>> from shapefile import TRIANGLE_STRIP, TRIANGLE_FAN - + >>> w = shapefile.Writer('shapefiles/test/multipatch') >>> w.field('name', 'C') - + >>> w.multipatch([ ... [[0,0,0],[0,0,3],[5,0,0],[5,0,3],[5,5,0],[5,5,3],[0,5,0],[0,5,3],[0,0,0],[0,0,3]], # TRIANGLE_STRIP for house walls ... [[2.5,2.5,5],[0,0,3],[5,0,3],[5,5,3],[0,5,3],[0,0,3]], # TRIANGLE_FAN for pointed house roof ... ], ... partTypes=[TRIANGLE_STRIP, TRIANGLE_FAN]) # one type for each part - + >>> w.record('house1') - + >>> w.close() - + For an introduction to the various multipatch part types and examples of how to create 3D MultiPatch objects see [this -ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). +ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiPatch_Geometry_Type.pdf). + - # Testing -The testing framework is pytest, and the tests are located in test_shapefile.py. -This includes an extensive set of unit tests of the various pyshp features, -and tests against various input data. Some of the tests that require -internet connectivity will be skipped in offline testing environments. -In the same folder as README.md and shapefile.py, from the command line run +The testing framework is pytest, and the tests are located in test_shapefile.py. +This includes an extensive set of unit tests of the various pyshp features, +and tests against various input data. Some of the tests that require +internet connectivity will be skipped in offline testing environments. +In the same folder as README.md and shapefile.py, from the command line run ``` $ python -m pytest -``` +``` -Additionally, all the code and examples located in this file, README.md, +Additionally, all the code and examples located in this file, README.md, is tested and verified with the builtin doctest framework. A special routine for invoking the doctest is run when calling directly on shapefile.py. -In the same folder as README.md and shapefile.py, from the command line run +In the same folder as README.md and shapefile.py, from the command line run ``` $ python shapefile.py -``` +``` Linux/Mac and similar platforms will need to run `$ dos2unix README.md` in order to correct line endings in README.md. diff --git a/shapefile.py b/shapefile.py index cace43fc..b218103f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -9,15 +9,15 @@ __version__ = "2.3.1" import array -from datetime import date import io import logging import os -from struct import pack, unpack, calcsize, error, Struct import sys import tempfile import time import zipfile +from datetime import date +from struct import Struct, calcsize, error, pack, unpack # Create named logger logger = logging.getLogger(__name__) @@ -81,16 +81,15 @@ xrange = range izip = zip - from urllib.parse import urlparse, urlunparse from urllib.error import HTTPError - from urllib.request import urlopen, Request + from urllib.parse import urlparse, urlunparse + from urllib.request import Request, urlopen else: from itertools import izip + from urllib2 import HTTPError, Request, urlopen from urlparse import urlparse, urlunparse - from urllib2 import HTTPError - from urllib2 import urlopen, Request # Helpers diff --git a/test_shapefile.py b/test_shapefile.py index ad7ea087..ca1e92d8 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -6,11 +6,13 @@ import json import os.path import sys + if sys.version_info.major == 3: from pathlib import Path # third party imports import pytest + if sys.version_info.major == 2: # required by pytest for python <36 from pathlib2 import Path @@ -18,7 +20,6 @@ # our imports import shapefile - # define various test shape tuples of (type, points, parts indexes, and expected geo interface output) geo_interface_tests = [ (shapefile.POINT, # point [(1,1)], From edcae5e6944bcb8d48e0c87e1ed15c0a0853b76c Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:11:53 +0100 Subject: [PATCH 048/115] Run tests via a reuseable custom Action. Let requirements.test.txt define Pytest version etc. (remove repeated installation). --- .github/actions/test/action.yml | 73 +++++++++++++++++++++++++++++++++ .github/workflows/build.yml | 32 ++++++--------- 2 files changed, 86 insertions(+), 19 deletions(-) create mode 100644 .github/actions/test/action.yml diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml new file mode 100644 index 00000000..ebb7f437 --- /dev/null +++ b/.github/actions/test/action.yml @@ -0,0 +1,73 @@ +name: + Test + +description: + Run pytest, and run the doctest runner (shapefile.py as a script). + +inputs: + python_version: + required: false + default: "3.13" + description: 'The Python version to run the tests on. ' + + +runs: + using: "composite" + steps: + # The Repo is required to already be checked out, e.g. by the calling workflow + - uses: LizardByte/setup-python-action@master + with: + python-version: ${{ inputs.python_version }} + + - name: Doctests + run: python shapefile.py + + - name: Install test dependencies. + run: | + python -m pip install --upgrade pip + pip install -r requirements.test.txt + + - name: Pytest + run: | + pytest + + - name: Show versions. + shell: ${{ inputs.shell }} + working-directory: '.\sDNA\sdna_vs2008\tests\pytest' + run: | + python --version + python -c "import numpy; print(numpy.__version__)" + python -m pytest --version + + + # - name: Run debug_test.py directly. + # shell: cmd + # working-directory: '.\sDNA\sdna_vs2008\tests' + # # run: python -u debug_test.py + # run: set "sdnadll=c:\Program Files (x86)\sDNA\x64\sdna_vs2008.dll" & python -u debug_test.py + + # - name: Run diff test number 0 directly via + # test_gathering_windows_command_line_tests.py, by test index + # working-directory: '.\sDNA\sdna_vs2008\tests\pytest' + # run: python test_gathering_windows_command_line_tests.py 0 + + - name: Run all the diff tests with Pytest. + shell: ${{ inputs.shell }} + working-directory: .\sDNA\sdna_vs2008\tests\pytest + env: + DONT_TEST_N_LINK_SUBSYSTEMS_ORDER: ${{ inputs.DONT_TEST_N_LINK_SUBSYSTEMS_ORDER }} + ALLOW_NEGATIVE_FORMULA_ERROR_ON_ANY_LINK_PRESENT: ${{ inputs.ALLOW_NEGATIVE_FORMULA_ERROR_ON_ANY_LINK_PRESENT }} + sdnadll: ${{ inputs.sdnadll }} + sdna_bin_dir: ${{ inputs.sdna_bin_dir }} + sdna_debug: ${{ inputs.sdna_debug }} + run: pytest -rA + # -rA shows summary of tests, one per line, even when they all pass. + # + # To run the same tests locally, the command is something like: + # run: > + # set "sdnadll=c:\Program Files (x86)\sDNA\x64\sdna_vs2008.dll" & + # set "sdna_bin_dir=c:\Program Files (x86)\sDNA\bin" & + # set sdna_debug=False & + # set DONT_TEST_N_LINK_SUBSYSTEMS_ORDER=1 & + # set ALLOW_NEGATIVE_FORMULA_ERROR_ON_ANY_LINK_PRESENT=1 & + # pytest -rA \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b9f58955..9908e619 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,4 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions +# This workflow will run the pre-commit hooks (including linters), and the tests with a variety of Python versions name: build @@ -16,8 +15,8 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 - test: + test: strategy: fail-fast: false matrix: @@ -33,21 +32,16 @@ jobs: "3.12", "3.13.0a2", ] - - runs-on: ubuntu-latest - container: - image: python:${{ matrix.python-version }}-slim - + os: [ + "ubuntu-latest", + "windows-latest", + "macos-latest", + ] + runs-on: matrix.os steps: - uses: actions/checkout@v4 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install pytest - if [ -f requirements.test.txt ]; then pip install -r requirements.test.txt; fi - - name: Test with doctest - run: | - python shapefile.py - - name: Test with pytest - run: | - pytest + + - name: Run tests + uses: ./.github/actions/test + with: + python_version: ${{ matrix.python_version }} From 18354e237d1376280a04d4c911112fd9cafda5a8 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:26:31 +0100 Subject: [PATCH 049/115] Don't test all Python versions on Windows and MacOS due to Github hosted runner limits. --- .github/workflows/build.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9908e619..8cdf134c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -34,9 +34,13 @@ jobs: ] os: [ "ubuntu-latest", - "windows-latest", - "macos-latest", ] + include: + - os: "windows-latest" + python-version: "3.13" + - os: "macos-latest" + python-version: "3.13" + runs-on: matrix.os steps: - uses: actions/checkout@v4 From de4f6afef6f61557c627749fbb9b781576de2528 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:28:24 +0100 Subject: [PATCH 050/115] Don't test on Windows or MacOS at all yet. --- .github/workflows/build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8cdf134c..3538aa94 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -35,11 +35,11 @@ jobs: os: [ "ubuntu-latest", ] - include: - - os: "windows-latest" - python-version: "3.13" - - os: "macos-latest" - python-version: "3.13" + # include: + # - os: "windows-latest" + # python-version: "3.13" + # - os: "macos-latest" + # python-version: "3.13" runs-on: matrix.os steps: From a79a6125b2c6ef99dcf4c58cba8c1b1c040ee1a4 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:42:32 +0100 Subject: [PATCH 051/115] Run Python version tests in containers. --- .github/workflows/build.yml | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3538aa94..16f11e36 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,7 @@ jobs: - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 - test: + test_in_slim_Python_containers: strategy: fail-fast: false matrix: @@ -32,15 +32,40 @@ jobs: "3.12", "3.13.0a2", ] + + runs-on: ubuntu-latest + container: + image: python:${{ matrix.python-version }}-slim + + steps: + - uses: actions/checkout@v4 + + - name: Run tests + uses: ./.github/actions/test + with: + python_version: ${{ matrix.python_version }} + + + test_on_MacOS_and_Windows: + + strategy: + fail-fast: false + matrix: + python-version: [ + "3.12", + ] os: [ - "ubuntu-latest", + "macos-latest", + "windows-latest", ] + # include: # - os: "windows-latest" # python-version: "3.13" # - os: "macos-latest" # python-version: "3.13" + runs-on: matrix.os steps: - uses: actions/checkout@v4 From a1e7642bdf6ec5a202179784ee1b33c1f3c04691 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 15:44:40 +0100 Subject: [PATCH 052/115] Update action.yml --- .github/actions/test/action.yml | 37 +-------------------------------- 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index ebb7f437..ec03ac84 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -31,43 +31,8 @@ runs: run: | pytest - - name: Show versions. - shell: ${{ inputs.shell }} - working-directory: '.\sDNA\sdna_vs2008\tests\pytest' + - name: Show versions for logs. run: | python --version - python -c "import numpy; print(numpy.__version__)" python -m pytest --version - - # - name: Run debug_test.py directly. - # shell: cmd - # working-directory: '.\sDNA\sdna_vs2008\tests' - # # run: python -u debug_test.py - # run: set "sdnadll=c:\Program Files (x86)\sDNA\x64\sdna_vs2008.dll" & python -u debug_test.py - - # - name: Run diff test number 0 directly via - # test_gathering_windows_command_line_tests.py, by test index - # working-directory: '.\sDNA\sdna_vs2008\tests\pytest' - # run: python test_gathering_windows_command_line_tests.py 0 - - - name: Run all the diff tests with Pytest. - shell: ${{ inputs.shell }} - working-directory: .\sDNA\sdna_vs2008\tests\pytest - env: - DONT_TEST_N_LINK_SUBSYSTEMS_ORDER: ${{ inputs.DONT_TEST_N_LINK_SUBSYSTEMS_ORDER }} - ALLOW_NEGATIVE_FORMULA_ERROR_ON_ANY_LINK_PRESENT: ${{ inputs.ALLOW_NEGATIVE_FORMULA_ERROR_ON_ANY_LINK_PRESENT }} - sdnadll: ${{ inputs.sdnadll }} - sdna_bin_dir: ${{ inputs.sdna_bin_dir }} - sdna_debug: ${{ inputs.sdna_debug }} - run: pytest -rA - # -rA shows summary of tests, one per line, even when they all pass. - # - # To run the same tests locally, the command is something like: - # run: > - # set "sdnadll=c:\Program Files (x86)\sDNA\x64\sdna_vs2008.dll" & - # set "sdna_bin_dir=c:\Program Files (x86)\sDNA\bin" & - # set sdna_debug=False & - # set DONT_TEST_N_LINK_SUBSYSTEMS_ORDER=1 & - # set ALLOW_NEGATIVE_FORMULA_ERROR_ON_ANY_LINK_PRESENT=1 & - # pytest -rA \ No newline at end of file From 67206790b2598680ec338757df049aabeeecd9eb Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:01:32 +0100 Subject: [PATCH 053/115] Drop action input varibles. Use official setup-python action. --- .github/actions/test/action.yml | 9 +-------- .github/workflows/build.yml | 7 +++---- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index ec03ac84..92d314ce 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -4,20 +4,13 @@ name: description: Run pytest, and run the doctest runner (shapefile.py as a script). -inputs: - python_version: - required: false - default: "3.13" - description: 'The Python version to run the tests on. ' runs: using: "composite" steps: # The Repo is required to already be checked out, e.g. by the calling workflow - - uses: LizardByte/setup-python-action@master - with: - python-version: ${{ inputs.python_version }} + # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path - name: Doctests run: python shapefile.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 16f11e36..231dbcd1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,8 +42,6 @@ jobs: - name: Run tests uses: ./.github/actions/test - with: - python_version: ${{ matrix.python_version }} test_on_MacOS_and_Windows: @@ -70,7 +68,8 @@ jobs: steps: - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python_version }} - name: Run tests uses: ./.github/actions/test - with: - python_version: ${{ matrix.python_version }} From 6b62dc6271b7c7bd18f00714da3fdceeed9da727 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:13:01 +0100 Subject: [PATCH 054/115] Delete action to clear bad ref in cache --- .github/actions/test/action.yml | 31 --------------------------- .github/workflows/build.yml | 38 +++++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 35 deletions(-) delete mode 100644 .github/actions/test/action.yml diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml deleted file mode 100644 index 92d314ce..00000000 --- a/.github/actions/test/action.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: - Test - -description: - Run pytest, and run the doctest runner (shapefile.py as a script). - - - -runs: - using: "composite" - steps: - # The Repo is required to already be checked out, e.g. by the calling workflow - # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path - - - name: Doctests - run: python shapefile.py - - - name: Install test dependencies. - run: | - python -m pip install --upgrade pip - pip install -r requirements.test.txt - - - name: Pytest - run: | - pytest - - - name: Show versions for logs. - run: | - python --version - python -m pytest --version - diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 231dbcd1..d29da30a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -40,8 +40,23 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Run tests - uses: ./.github/actions/test + + - name: Doctests + run: python shapefile.py + + - name: Install test dependencies. + run: | + python -m pip install --upgrade pip + pip install -r requirements.test.txt + + - name: Pytest + run: | + pytest + + - name: Show versions for logs. + run: | + python --version + python -m pytest --version test_on_MacOS_and_Windows: @@ -71,5 +86,20 @@ jobs: - uses: actions/setup-python@v5 with: python-version: ${{ inputs.python_version }} - - name: Run tests - uses: ./.github/actions/test + + - name: Doctests + run: python shapefile.py + + - name: Install test dependencies. + run: | + python -m pip install --upgrade pip + pip install -r requirements.test.txt + + - name: Pytest + run: | + pytest + + - name: Show versions for logs. + run: | + python --version + python -m pytest --version From 658f30ca7bc56f4740ce511976a681753e26914e Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:20:12 +0100 Subject: [PATCH 055/115] Re-use and re-instate the Action file --- .github/workflows/actions/test/action.yml | 29 ++++++++++++++++ .github/workflows/build.yml | 40 +++-------------------- 2 files changed, 33 insertions(+), 36 deletions(-) create mode 100644 .github/workflows/actions/test/action.yml diff --git a/.github/workflows/actions/test/action.yml b/.github/workflows/actions/test/action.yml new file mode 100644 index 00000000..d8883bc5 --- /dev/null +++ b/.github/workflows/actions/test/action.yml @@ -0,0 +1,29 @@ +name: + Test + +description: + Run pytest, and run the doctest runner (shapefile.py as a script). + +runs: + using: "composite" + steps: + # The Repo is required to already be checked out, e.g. by the calling workflow + + # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path + + - name: Doctests + run: python shapefile.py + + - name: Install test dependencies. + run: | + python -m pip install --upgrade pip + pip install -r requirements.test.txt + + - name: Pytest + run: | + pytest + + - name: Show versions for logs. + run: | + python --version + python -m pytest --version \ No newline at end of file diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d29da30a..0d2df3f2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -40,27 +40,11 @@ jobs: steps: - uses: actions/checkout@v4 - - - name: Doctests - run: python shapefile.py - - - name: Install test dependencies. - run: | - python -m pip install --upgrade pip - pip install -r requirements.test.txt - - - name: Pytest - run: | - pytest - - - name: Show versions for logs. - run: | - python --version - python -m pytest --version + - name: Run tests + uses: ./.github/actions/test test_on_MacOS_and_Windows: - strategy: fail-fast: false matrix: @@ -71,14 +55,12 @@ jobs: "macos-latest", "windows-latest", ] - # include: # - os: "windows-latest" # python-version: "3.13" # - os: "macos-latest" # python-version: "3.13" - runs-on: matrix.os steps: - uses: actions/checkout@v4 @@ -87,19 +69,5 @@ jobs: with: python-version: ${{ inputs.python_version }} - - name: Doctests - run: python shapefile.py - - - name: Install test dependencies. - run: | - python -m pip install --upgrade pip - pip install -r requirements.test.txt - - - name: Pytest - run: | - pytest - - - name: Show versions for logs. - run: | - python --version - python -m pytest --version + - name: Run tests + uses: ./.github/actions/test From 7b56069e47f69cc48b261e232b25bc1af5248c62 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:22:18 +0100 Subject: [PATCH 056/115] Move Action file to correct location --- .github/{workflows => }/actions/test/action.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{workflows => }/actions/test/action.yml (100%) diff --git a/.github/workflows/actions/test/action.yml b/.github/actions/test/action.yml similarity index 100% rename from .github/workflows/actions/test/action.yml rename to .github/actions/test/action.yml From 4b8821b586ccdaf7207947a6f890ae860c88b135 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:24:46 +0100 Subject: [PATCH 057/115] Specify bash shell --- .github/actions/test/action.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index d8883bc5..89a622fa 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -15,15 +15,18 @@ runs: run: python shapefile.py - name: Install test dependencies. + shell: bash run: | python -m pip install --upgrade pip pip install -r requirements.test.txt - name: Pytest + shell: bash run: | pytest - name: Show versions for logs. + shell: bash run: | python --version python -m pytest --version \ No newline at end of file From 69c54cb59bf21ed78bd59d50195c1b41176d32b5 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:25:45 +0100 Subject: [PATCH 058/115] Specify Bash everywhere --- .github/actions/test/action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 89a622fa..86ec93f3 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -12,6 +12,7 @@ runs: # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path - name: Doctests + shell: bash run: python shapefile.py - name: Install test dependencies. From 8c9bd567ebbe23c27792157a562ac757cb993e5a Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 16:42:07 +0100 Subject: [PATCH 059/115] Rename build.yml, as it doesn't build anything. Only test on Windows and MacOS in parent repo, due to limited Github hosted runner availability for forks. --- .github/workflows/deploy.yml | 11 +++++++++++ .../workflows/{build.yml => run_tests_and_hooks.yml} | 4 +++- 2 files changed, 14 insertions(+), 1 deletion(-) rename .github/workflows/{build.yml => run_tests_and_hooks.yml} (93%) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 70db5f72..057d8179 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -23,13 +23,24 @@ jobs: uses: actions/setup-python@v2 with: python-version: '3.x' + + # In general tests should be run after building a distribution, to test that distribution. + # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) + # then it's not crucial. + + # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests. + - name: Run tests and hooks + uses: ./.github/workflows/run_tests_and_hooks.yml + - name: Install dependencies run: | python -m pip install --upgrade pip pip install build - name: Build package run: python -m build + - name: Publish package + if: github.repository == 'GeospatialPython/pyshp' uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 with: user: __token__ diff --git a/.github/workflows/build.yml b/.github/workflows/run_tests_and_hooks.yml similarity index 93% rename from .github/workflows/build.yml rename to .github/workflows/run_tests_and_hooks.yml index 0d2df3f2..438813d1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -1,11 +1,12 @@ # This workflow will run the pre-commit hooks (including linters), and the tests with a variety of Python versions -name: build +name: Run pre-commit hooks and tests on: push: pull_request: branches: [ master ] + workflow_call: workflow_dispatch: jobs: @@ -45,6 +46,7 @@ jobs: test_on_MacOS_and_Windows: + if: github.repository == 'GeospatialPython/pyshp' strategy: fail-fast: false matrix: From 0ff41cf6a9018bfd6583db5b2988ec6979f83009 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 17:45:55 +0100 Subject: [PATCH 060/115] Make Windows tag more specific --- .github/workflows/run_tests_and_hooks.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index 438813d1..f9f36e13 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -54,8 +54,8 @@ jobs: "3.12", ] os: [ - "macos-latest", - "windows-latest", + # "macos-12", + "windows-2022", ] # include: # - os: "windows-latest" From d7462c1646985a65cd4ced17f3c41f8a0d2417e6 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 17:46:57 +0100 Subject: [PATCH 061/115] Test on Ubuntu 24.04 --- .github/workflows/run_tests_and_hooks.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index f9f36e13..c01c3bc1 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -55,6 +55,7 @@ jobs: ] os: [ # "macos-12", + "ubuntu-24.04" "windows-2022", ] # include: From a0c458c053bfb9430d73a96de06c9f480a574dc7 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 17:49:30 +0100 Subject: [PATCH 062/115] Update run_tests_and_hooks.yml --- .github/workflows/run_tests_and_hooks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index c01c3bc1..c64b41d3 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -55,7 +55,7 @@ jobs: ] os: [ # "macos-12", - "ubuntu-24.04" + "ubuntu-24.04", "windows-2022", ] # include: From be71d044b70b6d081c8f1d31b76a5ad876f89e28 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:06:01 +0100 Subject: [PATCH 063/115] Make testing on MacOS, Windows and Ubuntu 24.04 runners optional --- .github/workflows/cross_platform_tests.yml | 35 ++++++++++++++++++++++ .github/workflows/deploy.yml | 27 ++++++++++++----- .github/workflows/run_tests_and_hooks.yml | 32 +------------------- 3 files changed, 56 insertions(+), 38 deletions(-) create mode 100644 .github/workflows/cross_platform_tests.yml diff --git a/.github/workflows/cross_platform_tests.yml b/.github/workflows/cross_platform_tests.yml new file mode 100644 index 00000000..6be2b220 --- /dev/null +++ b/.github/workflows/cross_platform_tests.yml @@ -0,0 +1,35 @@ +# This workflow will run the tests on a variety of operating systems and architectures. + +name: Run tests on low availability Github osted runners + +on: + # Optionally run only, until the availability of the required Github hosted + # runners does not slow down CI. + workflow_call: + workflow_dispatch: + +jobs: + run_tests: + if: github.repository == 'GeospatialPython/pyshp' + strategy: + fail-fast: false + matrix: + python-version: [ + "3.12", + ] + os: [ + # "macos-12", + "ubuntu-24.04", + # "windows-2022", + ] + + runs-on: matrix.os + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python_version }} + + - name: Run tests + uses: ./.github/actions/test diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 057d8179..cfda98ef 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -13,25 +13,38 @@ on: types: [published] jobs: - deploy: + test: + # In general, tests should be run after building a distribution, to test that distribution. + # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) + # then this would only test the packaging process, not so much the code as there are + # no binaries. runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.x' - # In general tests should be run after building a distribution, to test that distribution. - # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) - # then it's not crucial. - - # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests. - name: Run tests and hooks uses: ./.github/workflows/run_tests_and_hooks.yml + deploy: + # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests. + needs: test + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + + - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index c64b41d3..c8972afa 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 - test_in_slim_Python_containers: + run_tests_in_containers: strategy: fail-fast: false matrix: @@ -44,33 +44,3 @@ jobs: - name: Run tests uses: ./.github/actions/test - - test_on_MacOS_and_Windows: - if: github.repository == 'GeospatialPython/pyshp' - strategy: - fail-fast: false - matrix: - python-version: [ - "3.12", - ] - os: [ - # "macos-12", - "ubuntu-24.04", - "windows-2022", - ] - # include: - # - os: "windows-latest" - # python-version: "3.13" - # - os: "macos-latest" - # python-version: "3.13" - - runs-on: matrix.os - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: ${{ inputs.python_version }} - - - name: Run tests - uses: ./.github/actions/test From 13c394ec39ffb27e5ebcf8e49cc6c71ceb178791 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:08:30 +0100 Subject: [PATCH 064/115] Trim trailing whitespace (install pre-commit hooks in new clone). --- .github/workflows/cross_platform_tests.yml | 2 +- .github/workflows/deploy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cross_platform_tests.yml b/.github/workflows/cross_platform_tests.yml index 6be2b220..2939a456 100644 --- a/.github/workflows/cross_platform_tests.yml +++ b/.github/workflows/cross_platform_tests.yml @@ -3,7 +3,7 @@ name: Run tests on low availability Github osted runners on: - # Optionally run only, until the availability of the required Github hosted + # Optionally run only, until the availability of the required Github hosted # runners does not slow down CI. workflow_call: workflow_dispatch: diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index cfda98ef..9a1fa30a 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,7 +17,7 @@ jobs: # In general, tests should be run after building a distribution, to test that distribution. # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) - # then this would only test the packaging process, not so much the code as there are + # then this would only test the packaging process, not so much the code as there are # no binaries. runs-on: ubuntu-latest From 4724d2117d95c682143914d3afb6b2b26147ec0d Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:10:04 +0100 Subject: [PATCH 065/115] Update cross_platform_tests.yml --- .github/workflows/cross_platform_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cross_platform_tests.yml b/.github/workflows/cross_platform_tests.yml index 2939a456..141c9661 100644 --- a/.github/workflows/cross_platform_tests.yml +++ b/.github/workflows/cross_platform_tests.yml @@ -1,6 +1,6 @@ # This workflow will run the tests on a variety of operating systems and architectures. -name: Run tests on low availability Github osted runners +name: Run tests on low availability Github hosted runners on: # Optionally run only, until the availability of the required Github hosted From 3eb3fe5154c089844b8ed4434f74a2b451c53fd8 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:14:31 +0100 Subject: [PATCH 066/115] Update cross_platform_tests.yml --- .github/workflows/cross_platform_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cross_platform_tests.yml b/.github/workflows/cross_platform_tests.yml index 141c9661..70398e3b 100644 --- a/.github/workflows/cross_platform_tests.yml +++ b/.github/workflows/cross_platform_tests.yml @@ -23,7 +23,7 @@ jobs: # "windows-2022", ] - runs-on: matrix.os + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 From e179f003b30a1e6d6c8a3dcce04391af7c5fb2fd Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:22:43 +0100 Subject: [PATCH 067/115] Update cross_platform_tests.yml --- .github/workflows/cross_platform_tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cross_platform_tests.yml b/.github/workflows/cross_platform_tests.yml index 70398e3b..2377a4cc 100644 --- a/.github/workflows/cross_platform_tests.yml +++ b/.github/workflows/cross_platform_tests.yml @@ -18,9 +18,9 @@ jobs: "3.12", ] os: [ - # "macos-12", + "macos-12", "ubuntu-24.04", - # "windows-2022", + "windows-2022", ] runs-on: ${{ matrix.os }} From 62e8dd7dfbe074896ec2872b4f0612d8ed70d669 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:42:17 +0100 Subject: [PATCH 068/115] Update cross_platform_tests.yml --- .github/workflows/cross_platform_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cross_platform_tests.yml b/.github/workflows/cross_platform_tests.yml index 2377a4cc..74595c02 100644 --- a/.github/workflows/cross_platform_tests.yml +++ b/.github/workflows/cross_platform_tests.yml @@ -29,7 +29,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: ${{ inputs.python_version }} + python-version: ${{ matrix.python_version }} - name: Run tests uses: ./.github/actions/test From 6d7370dba0157e0c4bf3fb7928274f04de07cd10 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:46:15 +0100 Subject: [PATCH 069/115] Update cross_platform_tests.yml --- .github/workflows/cross_platform_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cross_platform_tests.yml b/.github/workflows/cross_platform_tests.yml index 74595c02..85ad466b 100644 --- a/.github/workflows/cross_platform_tests.yml +++ b/.github/workflows/cross_platform_tests.yml @@ -29,7 +29,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python_version }} + python-version: ${{ matrix.python-version }} - name: Run tests uses: ./.github/actions/test From 1faa9aad3762fbc6405b3f2646aa50c5cd40d1d7 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:54:13 +0100 Subject: [PATCH 070/115] Reincorporate cross platform tests --- .github/workflows/cross_platform_tests.yml | 35 ---------------------- .github/workflows/run_tests_and_hooks.yml | 29 ++++++++++++++++++ 2 files changed, 29 insertions(+), 35 deletions(-) delete mode 100644 .github/workflows/cross_platform_tests.yml diff --git a/.github/workflows/cross_platform_tests.yml b/.github/workflows/cross_platform_tests.yml deleted file mode 100644 index 85ad466b..00000000 --- a/.github/workflows/cross_platform_tests.yml +++ /dev/null @@ -1,35 +0,0 @@ -# This workflow will run the tests on a variety of operating systems and architectures. - -name: Run tests on low availability Github hosted runners - -on: - # Optionally run only, until the availability of the required Github hosted - # runners does not slow down CI. - workflow_call: - workflow_dispatch: - -jobs: - run_tests: - if: github.repository == 'GeospatialPython/pyshp' - strategy: - fail-fast: false - matrix: - python-version: [ - "3.12", - ] - os: [ - "macos-12", - "ubuntu-24.04", - "windows-2022", - ] - - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Run tests - uses: ./.github/actions/test diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index c8972afa..969070d8 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -44,3 +44,32 @@ jobs: - name: Run tests uses: ./.github/actions/test + + run_tests: + strategy: + fail-fast: false + matrix: + python-version: [ + "3.9", + "3.10", + "3.11", + "3.12", + "3.130a2", + + ] + os: [ + "macos-12", + "ubuntu-24.04", + "windows-2022", + ] + + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Run tests + uses: ./.github/actions/test \ No newline at end of file From f075e83ef02eeb0530dde92b4f575b24bc64e98d Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:56:24 +0100 Subject: [PATCH 071/115] Correct Python 3.13 tag --- .github/workflows/run_tests_and_hooks.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index 969070d8..5e16cb78 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -54,8 +54,7 @@ jobs: "3.10", "3.11", "3.12", - "3.130a2", - + "3.13.0a2", ] os: [ "macos-12", From def9580195dfc29b321627ef4fe9357d3bb3d1d4 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 18:59:32 +0100 Subject: [PATCH 072/115] Use Github's required Python tag names --- .github/workflows/run_tests_and_hooks.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index 5e16cb78..b4d56f8c 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -54,7 +54,8 @@ jobs: "3.10", "3.11", "3.12", - "3.13.0a2", + "3.13.0-rc.2", + "3.14.0-alpha.0" ] os: [ "macos-12", From 24b8a73672c9ea8ad369ae044dbb1467149153fb Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 19:06:49 +0100 Subject: [PATCH 073/115] Test on MacOS Arm, and both Ubuntu 22.04 & 24.04 --- .github/workflows/run_tests_and_hooks.yml | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index b4d56f8c..011ce529 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 - run_tests_in_containers: + test_on_old_Pythons: strategy: fail-fast: false matrix: @@ -27,11 +27,6 @@ jobs: "3.6", "3.7", "3.8", - "3.9", - "3.10", - "3.11", - "3.12", - "3.13.0a2", ] runs-on: ubuntu-latest @@ -55,13 +50,17 @@ jobs: "3.11", "3.12", "3.13.0-rc.2", - "3.14.0-alpha.0" ] os: [ - "macos-12", + "macos-latest", + "macos-latest-large", + "ubuntu-latest", "ubuntu-24.04", - "windows-2022", + "windows-latest", ] + include: + - os: ubuntu-24.04 + python-version: "3.14.0-alpha.0" runs-on: ${{ matrix.os }} steps: From 8e06e04ec2b7c642eaabb0d2a22102bdc366c3bd Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 19:09:41 +0100 Subject: [PATCH 074/115] Don't test on macos-latest-large (ARM). Jobs fail for unspecified reasons. --- .github/workflows/run_tests_and_hooks.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index 011ce529..d64480a3 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -53,7 +53,6 @@ jobs: ] os: [ "macos-latest", - "macos-latest-large", "ubuntu-latest", "ubuntu-24.04", "windows-latest", From 9cc86e2a0dbea11233720af1b6c6657aa8aed15a Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Tue, 17 Sep 2024 19:21:03 +0100 Subject: [PATCH 075/115] Test Python 14 alpha on Ubuntu 22.04 --- .github/workflows/run_tests_and_hooks.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_and_hooks.yml index d64480a3..0a24ebae 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_and_hooks.yml @@ -60,6 +60,8 @@ jobs: include: - os: ubuntu-24.04 python-version: "3.14.0-alpha.0" + - os: ubuntu-22.04 + python-version: "3.14.0-alpha.0" runs-on: ${{ matrix.os }} steps: From 916397306aa3c8f59093746cf467c00f1dac0e90 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 18 Sep 2024 19:11:59 +0100 Subject: [PATCH 076/115] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 03827eb3..f6d63328 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,4 @@ dist/ *.egg-info/ *.py[cod] .vscode +.dmypy.json From ffc7c464cb0314222a400cdd920fb4f191de7357 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Wed, 18 Sep 2024 19:17:11 +0100 Subject: [PATCH 077/115] Replace "== True" with "is True" in test_shapefile.py --- test_shapefile.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test_shapefile.py b/test_shapefile.py index ca1e92d8..c9cd972c 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -259,7 +259,7 @@ def test_reader_url(): with shapefile.Reader(url) as sf: for recShape in sf.iterShapeRecords(): pass - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True # test without extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries?raw=true" @@ -267,7 +267,7 @@ def test_reader_url(): for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True # test no files found url = "https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/README.md" @@ -281,7 +281,7 @@ def test_reader_url(): for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True def test_reader_zip(): @@ -293,7 +293,7 @@ def test_reader_zip(): for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True # test require specific path when reading multi-shapefile zipfile with pytest.raises(shapefile.ShapefileException): @@ -305,14 +305,14 @@ def test_reader_zip(): for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True # test specifying the path when reading multi-shapefile zipfile (without extension) with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2") as sf: for recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed == True + assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True # test raising error when can't find shapefile inside zipfile with pytest.raises(shapefile.ShapefileException): @@ -1150,7 +1150,7 @@ def test_write_shp_only(tmpdir): assert writer.shp and not writer.shx and not writer.dbf assert writer.shpNum == 1 assert len(writer) == 1 - assert writer.shp.closed == True + assert writer.shp.closed is True # assert test.shp exists assert os.path.exists(filename+'.shp') @@ -1180,7 +1180,7 @@ def test_write_shp_shx_only(tmpdir): assert writer.shp and writer.shx and not writer.dbf assert writer.shpNum == 1 assert len(writer) == 1 - assert writer.shp.closed == writer.shx.closed == True + assert writer.shp.closed == writer.shx.closed is True # assert test.shp exists assert os.path.exists(filename+'.shp') @@ -1214,7 +1214,7 @@ def test_write_shp_dbf_only(tmpdir): assert writer.shp and not writer.shx and writer.dbf assert writer.shpNum == writer.recNum == 1 assert len(writer) == 1 - assert writer.shp.closed == writer.dbf.closed == True + assert writer.shp.closed == writer.dbf.closed is True # assert test.shp exists assert os.path.exists(filename+'.shp') @@ -1246,7 +1246,7 @@ def test_write_dbf_only(tmpdir): assert not writer.shp and not writer.shx and writer.dbf assert writer.recNum == 1 assert len(writer) == 1 - assert writer.dbf.closed == True + assert writer.dbf.closed is True # assert test.dbf exists assert os.path.exists(filename+'.dbf') From 31798dab61483ed4b06fe129eb070d171bcc3a83 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 14:09:53 +0100 Subject: [PATCH 078/115] Fix some Pylint warnings. Silence W0212 in config using pylint_per_file_ignore. Give credit. Requiring pytest >= 3.7 means pathib2 will always be available. Pylint needs the deps installed, of the files it checks Dedupe pip install Setup pylint in CI --- ...ooks.yml => run_tests_hooks_and_tools.yml} | 13 ++++++ README.md | 7 ++- pyproject.toml | 20 +++++++++ requirements.test.txt | 2 +- test_shapefile.py | 44 +++++++++---------- 5 files changed, 60 insertions(+), 26 deletions(-) rename .github/workflows/{run_tests_and_hooks.yml => run_tests_hooks_and_tools.yml} (79%) diff --git a/.github/workflows/run_tests_and_hooks.yml b/.github/workflows/run_tests_hooks_and_tools.yml similarity index 79% rename from .github/workflows/run_tests_and_hooks.yml rename to .github/workflows/run_tests_hooks_and_tools.yml index 0a24ebae..4c393a33 100644 --- a/.github/workflows/run_tests_and_hooks.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -17,6 +17,19 @@ jobs: - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.1 + pylint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: install Pylint and plugin + run: | + python -m pip install --upgrade pip + pip install pytest pylint pylint-per-file-ignores + - name: run Pylint for errors and warnings only, on test_shapefile.py + run: | + pylint --disable=R,C test_shapefile.py + test_on_old_Pythons: strategy: fail-fast: false diff --git a/README.md b/README.md index 8bfc869c..15f9f9b4 100644 --- a/README.md +++ b/README.md @@ -1440,8 +1440,9 @@ In the same folder as README.md and shapefile.py, from the command line run $ python shapefile.py ``` -Linux/Mac and similar platforms will need to run `$ dos2unix README.md` in order -to correct line endings in README.md. +Linux/Mac and similar platforms may need to run `$ dos2unix README.md` in order +to correct line endings in README.md, if Git has not automatically changed them. + # Contributors @@ -1459,10 +1460,12 @@ fiveham geospatialpython Hannes Ignacio Martinez Vazquez +James Parrott Jason Moujaes Jonty Wareing Karim Bahgat karanrn +Kurt Schwehr Kyle Kelley Louis Tiao Marcin Cuprjak diff --git a/pyproject.toml b/pyproject.toml index fed528d4..e4a82a1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,23 @@ [build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" + +[tool.pylint.MASTER] +load-plugins=[ + "pylint_per_file_ignores", +] + +[tool.pylint.'MESSAGES CONTROL'] +# Silence warning: shapefile.py:2076:20: W0212: Access to a protected +# member _from_geojson of a client class (protected-access) +# +# Silence warnings: test_shapefile.py:{783,786,799,803,06,1195}:19: +# W0212: Access to a protected member _offsets of a +# client class (protected-access) +# +# Toml multi-line string used instead of array due to: +# https://github.com/christopherpickering/pylint-per-file-ignores/issues/160 +per-file-ignores = """ + shapefile.py:W0212 + test_shapefile.py:W0212 +""" \ No newline at end of file diff --git a/requirements.test.txt b/requirements.test.txt index 27472efe..11141738 100644 --- a/requirements.test.txt +++ b/requirements.test.txt @@ -1,2 +1,2 @@ -pytest +pytest >= 3.7 setuptools diff --git a/test_shapefile.py b/test_shapefile.py index c9cd972c..f5dd7187 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -5,18 +5,16 @@ import datetime import json import os.path -import sys -if sys.version_info.major == 3: +try: from pathlib import Path +except ImportError: + # pathlib2 is a dependency of pytest >= 3.7 + from pathlib2 import Path # third party imports import pytest -if sys.version_info.major == 2: - # required by pytest for python <36 - from pathlib2 import Path - # our imports import shapefile @@ -208,7 +206,7 @@ def test_empty_shape_geo_interface(): """ shape = shapefile.Shape() with pytest.raises(Exception): - shape.__geo_interface__ + getattr(shape, '__geo_interface__') @pytest.mark.parametrize("typ,points,parts,expected", geo_interface_tests) def test_expected_shape_geo_interface(typ, points, parts, expected): @@ -257,17 +255,17 @@ def test_reader_url(): # test with extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true" with shapefile.Reader(url) as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test without extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries?raw=true" with shapefile.Reader(url) as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test no files found url = "https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/README.md" @@ -278,10 +276,10 @@ def test_reader_url(): # test reading zipfile from url url = "https://github.com/JamesParrott/PyShp_test_shapefile/raw/main/gis_osm_natural_a_free_1.zip" with shapefile.Reader(url) as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True def test_reader_zip(): @@ -290,10 +288,10 @@ def test_reader_zip(): """ # test reading zipfile only with shapefile.Reader("shapefiles/blockgroups.zip") as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test require specific path when reading multi-shapefile zipfile with pytest.raises(shapefile.ShapefileException): @@ -302,17 +300,17 @@ def test_reader_zip(): # test specifying the path when reading multi-shapefile zipfile (with extension) with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp") as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test specifying the path when reading multi-shapefile zipfile (without extension) with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2") as sf: - for recShape in sf.iterShapeRecords(): + for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 - assert sf.shp.closed == sf.shx.closed == sf.dbf.closed is True + assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test raising error when can't find shapefile inside zipfile with pytest.raises(shapefile.ShapefileException): @@ -783,7 +781,7 @@ def test_reader_offsets(): # shx offsets should not be read during loading assert not sf._offsets # reading a shape index should trigger reading offsets from shx file - shape = sf.shape(3) + sf.shape(3) assert len(sf._offsets) == len(sf.shapes()) @@ -800,7 +798,7 @@ def test_reader_offsets_no_shx(): assert not sf._offsets # reading a shape index should iterate to the shape # but the list of offsets should remain empty - shape = sf.shape(3) + sf.shape(3) assert not sf._offsets # reading all the shapes should build the list of offsets shapes = sf.shapes() @@ -1180,7 +1178,7 @@ def test_write_shp_shx_only(tmpdir): assert writer.shp and writer.shx and not writer.dbf assert writer.shpNum == 1 assert len(writer) == 1 - assert writer.shp.closed == writer.shx.closed is True + assert writer.shp.closed is writer.shx.closed is True # assert test.shp exists assert os.path.exists(filename+'.shp') @@ -1214,7 +1212,7 @@ def test_write_shp_dbf_only(tmpdir): assert writer.shp and not writer.shx and writer.dbf assert writer.shpNum == writer.recNum == 1 assert len(writer) == 1 - assert writer.shp.closed == writer.dbf.closed is True + assert writer.shp.closed is writer.dbf.closed is True # assert test.shp exists assert os.path.exists(filename+'.shp') From 98961bb2376134f1c2ec0ad49ed84db3ea9cdc04 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:36:47 +0100 Subject: [PATCH 079/115] Add Ruff-format to pre-commit hooks --- .pre-commit-config.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 18f0b5dd..f065f594 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,4 +9,7 @@ repos: hooks: - id: isort name: isort (python) - +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.4 + hooks: + - id: ruff-format From cd24957021959f48b517c75cc03e2bf813cc725a Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:43:46 +0100 Subject: [PATCH 080/115] Add settings for Ruff and run ruff format --- pyproject.toml | 66 +++ shapefile.py | 1029 ++++++++++++++++++++++++++++----------------- test_shapefile.py | 904 +++++++++++++++++++++++++-------------- 3 files changed, 1283 insertions(+), 716 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e4a82a1f..8cdddf81 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,72 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" + +[tool.ruff] +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] + +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.9 +target-version = "py39" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +select = ["E4", "E7", "E9", "F"] +ignore = [] + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + + + [tool.pylint.MASTER] load-plugins=[ "pylint_per_file_ignores", diff --git a/shapefile.py b/shapefile.py index b218103f..22c217e8 100644 --- a/shapefile.py +++ b/shapefile.py @@ -42,20 +42,21 @@ MULTIPATCH = 31 SHAPETYPE_LOOKUP = { - 0: 'NULL', - 1: 'POINT', - 3: 'POLYLINE', - 5: 'POLYGON', - 8: 'MULTIPOINT', - 11: 'POINTZ', - 13: 'POLYLINEZ', - 15: 'POLYGONZ', - 18: 'MULTIPOINTZ', - 21: 'POINTM', - 23: 'POLYLINEM', - 25: 'POLYGONM', - 28: 'MULTIPOINTM', - 31: 'MULTIPATCH'} + 0: "NULL", + 1: "POINT", + 3: "POLYLINE", + 5: "POLYGON", + 8: "MULTIPOINT", + 11: "POINTZ", + 13: "POLYLINEZ", + 15: "POLYGONZ", + 18: "MULTIPOINTZ", + 21: "POINTM", + 23: "POLYLINEM", + 25: "POLYGONM", + 28: "MULTIPOINTM", + 31: "MULTIPATCH", +} TRIANGLE_STRIP = 0 TRIANGLE_FAN = 1 @@ -65,12 +66,13 @@ RING = 5 PARTTYPE_LOOKUP = { - 0: 'TRIANGLE_STRIP', - 1: 'TRIANGLE_FAN', - 2: 'OUTER_RING', - 3: 'INNER_RING', - 4: 'FIRST_RING', - 5: 'RING'} + 0: "TRIANGLE_STRIP", + 1: "TRIANGLE_FAN", + 2: "OUTER_RING", + 3: "INNER_RING", + 4: "FIRST_RING", + 5: "RING", +} # Python 2-3 handling @@ -94,11 +96,12 @@ # Helpers -MISSING = [None,''] -NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. +MISSING = [None, ""] +NODATA = -10e38 # as per the ESRI shapefile spec, only used for m-values. if PYTHON3: - def b(v, encoding='utf-8', encodingErrors='strict'): + + def b(v, encoding="utf-8", encodingErrors="strict"): if isinstance(v, str): # For python 3 encode str to bytes. return v.encode(encoding, encodingErrors) @@ -112,7 +115,7 @@ def b(v, encoding='utf-8', encodingErrors='strict'): # Force string representation. return str(v).encode(encoding, encodingErrors) - def u(v, encoding='utf-8', encodingErrors='strict'): + def u(v, encoding="utf-8", encodingErrors="strict"): if isinstance(v, bytes): # For python 3 decode bytes to str. return v.decode(encoding, encodingErrors) @@ -130,7 +133,8 @@ def is_string(v): return isinstance(v, str) else: - def b(v, encoding='utf-8', encodingErrors='strict'): + + def b(v, encoding="utf-8", encodingErrors="strict"): if isinstance(v, unicode): # For python 2 encode unicode to bytes. return v.encode(encoding, encodingErrors) @@ -144,7 +148,7 @@ def b(v, encoding='utf-8', encodingErrors='strict'): # Force string representation. return unicode(v).encode(encoding, encodingErrors) - def u(v, encoding='utf-8', encodingErrors='strict'): + def u(v, encoding="utf-8", encodingErrors="strict"): if isinstance(v, bytes): # For python 2 decode bytes to unicode. return v.decode(encoding, encodingErrors) @@ -153,7 +157,7 @@ def u(v, encoding='utf-8', encodingErrors='strict'): return v elif v is None: # Since we're dealing with text, interpret None as "" - return u"" + return "" else: # Force string representation. return bytes(v).decode(encoding, encodingErrors) @@ -161,13 +165,16 @@ def u(v, encoding='utf-8', encodingErrors='strict'): def is_string(v): return isinstance(v, basestring) + if sys.version_info[0:2] >= (3, 6): + def pathlike_obj(path): if isinstance(path, os.PathLike): return os.fsdecode(path) else: return path else: + def pathlike_obj(path): if is_string(path): return path @@ -182,27 +189,31 @@ def pathlike_obj(path): # Begin + class _Array(array.array): """Converts python tuples to lists of the appropriate type. Used to unpack different shapefile header parts.""" + def __repr__(self): return str(self.tolist()) + def signed_area(coords, fast=False): """Return the signed area enclosed by a ring using the linear time algorithm. A value >= 0 indicates a counter-clockwise oriented ring. A faster version is possible by setting 'fast' to True, which returns 2x the area, e.g. if you're only interested in the sign of the area. """ - xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values + xs, ys = map(list, list(zip(*coords))[:2]) # ignore any z or m values xs.append(xs[1]) ys.append(ys[1]) - area2 = sum(xs[i]*(ys[i+1]-ys[i-1]) for i in range(1, len(coords))) + area2 = sum(xs[i] * (ys[i + 1] - ys[i - 1]) for i in range(1, len(coords))) if fast: return area2 else: return area2 / 2.0 + def is_cw(coords): """Returns True if a polygon ring has clockwise orientation, determined by a negatively signed area. @@ -210,34 +221,35 @@ def is_cw(coords): area2 = signed_area(coords, fast=True) return area2 < 0 + def rewind(coords): - """Returns the input coords in reversed order. - """ + """Returns the input coords in reversed order.""" return list(reversed(coords)) + def ring_bbox(coords): - """Calculates and returns the bounding box of a ring. - """ - xs,ys = zip(*coords) - bbox = min(xs),min(ys),max(xs),max(ys) + """Calculates and returns the bounding box of a ring.""" + xs, ys = zip(*coords) + bbox = min(xs), min(ys), max(xs), max(ys) return bbox + def bbox_overlap(bbox1, bbox2): - """Tests whether two bounding boxes overlap, returning a boolean - """ - xmin1,ymin1,xmax1,ymax1 = bbox1 - xmin2,ymin2,xmax2,ymax2 = bbox2 - overlap = (xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2) + """Tests whether two bounding boxes overlap, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + overlap = xmin1 <= xmax2 and xmax1 >= xmin2 and ymin1 <= ymax2 and ymax1 >= ymin2 return overlap + def bbox_contains(bbox1, bbox2): - """Tests whether bbox1 fully contains bbox2, returning a boolean - """ - xmin1,ymin1,xmax1,ymax1 = bbox1 - xmin2,ymin2,xmax2,ymax2 = bbox2 - contains = (xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2) + """Tests whether bbox1 fully contains bbox2, returning a boolean""" + xmin1, ymin1, xmax1, ymax1 = bbox1 + xmin2, ymin2, xmax2, ymax2 = bbox2 + contains = xmin1 < xmin2 and xmax1 > xmax2 and ymin1 < ymin2 and ymax1 > ymax2 return contains + def ring_contains_point(coords, p): """Fast point-in-polygon crossings algorithm, MacMartin optimization. @@ -249,29 +261,31 @@ def ring_contains_point(coords, p): compare vertex Y values to the testing point's Y and quickly discard edges which are entirely to one side of the test ray. """ - tx,ty = p + tx, ty = p # get initial test bit for above/below X axis vtx0 = coords[0] - yflag0 = ( vtx0[1] >= ty ) + yflag0 = vtx0[1] >= ty inside_flag = False for vtx1 in coords[1:]: - yflag1 = ( vtx1[1] >= ty ) + yflag1 = vtx1[1] >= ty # check if endpoints straddle (are on opposite sides) of X axis # (i.e. the Y's differ); if so, +X ray could intersect this edge. if yflag0 != yflag1: - xflag0 = ( vtx0[0] >= tx ) + xflag0 = vtx0[0] >= tx # check if endpoints are on same side of the Y axis (i.e. X's # are the same); if so, it's easy to test if edge hits or misses. - if xflag0 == ( vtx1[0] >= tx ): + if xflag0 == (vtx1[0] >= tx): # if edge's X values both right of the point, must hit if xflag0: inside_flag = not inside_flag else: # compute intersection of pgon segment with +X ray, note # if >= point's X; if so, the ray hits it. - if ( vtx1[0] - (vtx1[1]-ty) * ( vtx0[0]-vtx1[0]) / (vtx0[1]-vtx1[1]) ) >= tx: + if ( + vtx1[0] - (vtx1[1] - ty) * (vtx0[0] - vtx1[0]) / (vtx0[1] - vtx1[1]) + ) >= tx: inside_flag = not inside_flag # move to next pair of vertices, retaining info as possible @@ -280,6 +294,7 @@ def ring_contains_point(coords, p): return inside_flag + def ring_sample(coords, ccw=False): """Return a sample point guaranteed to be within a ring, by efficiently finding the first centroid of a coordinate triplet whose orientation @@ -288,6 +303,7 @@ def ring_sample(coords, ccw=False): (counter-clockwise) is set to True. """ triplet = [] + def itercoords(): # iterate full closed ring for p in coords: @@ -303,7 +319,9 @@ def itercoords(): # new triplet, try to get sample if len(triplet) == 3: # check that triplet does not form a straight line (not a triangle) - is_straight_line = (triplet[0][1] - triplet[1][1]) * (triplet[0][0] - triplet[2][0]) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) + is_straight_line = (triplet[0][1] - triplet[1][1]) * ( + triplet[0][0] - triplet[2][0] + ) == (triplet[0][1] - triplet[2][1]) * (triplet[0][0] - triplet[1][0]) if not is_straight_line: # get triplet orientation closed_triplet = triplet + [triplet[0]] @@ -311,26 +329,27 @@ def itercoords(): # check that triplet has the same orientation as the ring (means triangle is inside the ring) if ccw == triplet_ccw: # get triplet centroid - xs,ys = zip(*triplet) - xmean,ymean = sum(xs) / 3.0, sum(ys) / 3.0 + xs, ys = zip(*triplet) + xmean, ymean = sum(xs) / 3.0, sum(ys) / 3.0 # check that triplet centroid is truly inside the ring - if ring_contains_point(coords, (xmean,ymean)): - return xmean,ymean + if ring_contains_point(coords, (xmean, ymean)): + return xmean, ymean # failed to get sample point from this triplet # remove oldest triplet coord to allow iterating to next triplet triplet.pop(0) else: - raise Exception('Unexpected error: Unable to find a ring sample point.') + raise Exception("Unexpected error: Unable to find a ring sample point.") + def ring_contains_ring(coords1, coords2): - '''Returns True if all vertexes in coords2 are fully inside coords1. - ''' + """Returns True if all vertexes in coords2 are fully inside coords1.""" return all((ring_contains_point(coords1, p2) for p2 in coords2)) + def organize_polygon_rings(rings, return_errors=None): - '''Organize a list of coordinate rings into one or more polygons with holes. + """Organize a list of coordinate rings into one or more polygons with holes. Returns a list of polygons, where each polygon is composed of a single exterior ring, and one or more interior holes. If a return_errors dict is provided (optional), any errors encountered will be added to it. @@ -340,7 +359,7 @@ def organize_polygon_rings(rings, return_errors=None): holes if they run in counter-clockwise direction. This method is used to construct GeoJSON (multi)polygons from the shapefile polygon shape type, which does not explicitly store the structure of the polygons beyond exterior/interior ring orientation. - ''' + """ # first iterate rings and classify as exterior or hole exteriors = [] holes = [] @@ -374,17 +393,16 @@ def organize_polygon_rings(rings, return_errors=None): return polys # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = dict([(hole_i,[]) for hole_i in xrange(len(holes))]) + hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) exterior_bboxes = [ring_bbox(ring) for ring in exteriors] for hole_i in hole_exteriors.keys(): hole_bbox = ring_bbox(holes[hole_i]) - for ext_i,ext_bbox in enumerate(exterior_bboxes): + for ext_i, ext_bbox in enumerate(exterior_bboxes): if bbox_contains(ext_bbox, hole_bbox): - hole_exteriors[hole_i].append( ext_i ) + hole_exteriors[hole_i].append(ext_i) # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test - for hole_i,exterior_candidates in hole_exteriors.items(): - + for hole_i, exterior_candidates in hole_exteriors.items(): if len(exterior_candidates) > 1: # get hole sample point ccw = not is_cw(holes[hole_i]) @@ -393,7 +411,9 @@ def organize_polygon_rings(rings, return_errors=None): new_exterior_candidates = [] for ext_i in exterior_candidates: # check that hole sample point is inside exterior - hole_in_exterior = ring_contains_point(exteriors[ext_i], hole_sample) + hole_in_exterior = ring_contains_point( + exteriors[ext_i], hole_sample + ) if hole_in_exterior: new_exterior_candidates.append(ext_i) @@ -401,31 +421,33 @@ def organize_polygon_rings(rings, return_errors=None): hole_exteriors[hole_i] = new_exterior_candidates # if still holes with more than one possible exterior, means we have an exterior hole nested inside another exterior's hole - for hole_i,exterior_candidates in hole_exteriors.items(): - + for hole_i, exterior_candidates in hole_exteriors.items(): if len(exterior_candidates) > 1: # exterior candidate with the smallest area is the hole's most immediate parent - ext_i = sorted(exterior_candidates, key=lambda x: abs(signed_area(exteriors[x], fast=True)))[0] + ext_i = sorted( + exterior_candidates, + key=lambda x: abs(signed_area(exteriors[x], fast=True)), + )[0] hole_exteriors[hole_i] = [ext_i] # separate out holes that are orphaned (not contained by any exterior) orphan_holes = [] - for hole_i,exterior_candidates in list(hole_exteriors.items()): + for hole_i, exterior_candidates in list(hole_exteriors.items()): if not exterior_candidates: - orphan_holes.append( hole_i ) + orphan_holes.append(hole_i) del hole_exteriors[hole_i] continue # each hole should now only belong to one exterior, group into exterior-holes polygons polys = [] - for ext_i,ext in enumerate(exteriors): + for ext_i, ext in enumerate(exteriors): poly = [ext] # find relevant holes poly_holes = [] - for hole_i,exterior_candidates in list(hole_exteriors.items()): + for hole_i, exterior_candidates in list(hole_exteriors.items()): # hole is relevant if previously matched with this exterior if exterior_candidates[0] == ext_i: - poly_holes.append( holes[hole_i] ) + poly_holes.append(holes[hole_i]) poly += poly_holes polys.append(poly) @@ -437,21 +459,24 @@ def organize_polygon_rings(rings, return_errors=None): polys.append(poly) if orphan_holes and return_errors is not None: - return_errors['polygon_orphaned_holes'] = len(orphan_holes) + return_errors["polygon_orphaned_holes"] = len(orphan_holes) return polys # no exteriors, be nice and assume due to incorrect winding order else: if return_errors is not None: - return_errors['polygon_only_holes'] = len(holes) + return_errors["polygon_only_holes"] = len(holes) exteriors = holes # add as single exterior without any holes polys = [[ext] for ext in exteriors] return polys + class Shape(object): - def __init__(self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None): + def __init__( + self, shapeType=NULL, points=None, parts=None, partTypes=None, oid=None + ): """Stores the geometry of the different shape types specified in the Shapefile spec. Shape types are usually point, polyline, or polygons. Every shape type @@ -486,35 +511,32 @@ def __geo_interface__(self): # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'Point', 'coordinates':tuple()} + return {"type": "Point", "coordinates": tuple()} else: - return { - 'type': 'Point', - 'coordinates': tuple(self.points[0]) - } + return {"type": "Point", "coordinates": tuple(self.points[0])} elif self.shapeType in [MULTIPOINT, MULTIPOINTM, MULTIPOINTZ]: if len(self.points) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'MultiPoint', 'coordinates':[]} + return {"type": "MultiPoint", "coordinates": []} else: # multipoint return { - 'type': 'MultiPoint', - 'coordinates': [tuple(p) for p in self.points] + "type": "MultiPoint", + "coordinates": [tuple(p) for p in self.points], } elif self.shapeType in [POLYLINE, POLYLINEM, POLYLINEZ]: if len(self.parts) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'LineString', 'coordinates':[]} + return {"type": "LineString", "coordinates": []} elif len(self.parts) == 1: # linestring return { - 'type': 'LineString', - 'coordinates': [tuple(p) for p in self.points] + "type": "LineString", + "coordinates": [tuple(p) for p in self.points], } else: # multilinestring @@ -529,16 +551,13 @@ def __geo_interface__(self): ps = part else: coordinates.append([tuple(p) for p in self.points[part:]]) - return { - 'type': 'MultiLineString', - 'coordinates': coordinates - } + return {"type": "MultiLineString", "coordinates": coordinates} elif self.shapeType in [POLYGON, POLYGONM, POLYGONZ]: if len(self.parts) == 0: # the shape has no coordinate information, i.e. is 'empty' # the geojson spec does not define a proper null-geometry type # however, it does allow geometry types with 'empty' coordinates to be interpreted as null-geometries - return {'type':'Polygon', 'coordinates':[]} + return {"type": "Polygon", "coordinates": []} else: # get all polygon rings rings = [] @@ -546,7 +565,7 @@ def __geo_interface__(self): # get indexes of start and end points of the ring start = self.parts[i] try: - end = self.parts[i+1] + end = self.parts[i + 1] except IndexError: end = len(self.points) @@ -561,35 +580,40 @@ def __geo_interface__(self): # if VERBOSE is True, issue detailed warning about any shape errors # encountered during the Shapefile to GeoJSON conversion if VERBOSE and self._errors: - header = 'Possible issue encountered when converting Shape #{} to GeoJSON: '.format(self.oid) - orphans = self._errors.get('polygon_orphaned_holes', None) + header = "Possible issue encountered when converting Shape #{} to GeoJSON: ".format( + self.oid + ) + orphans = self._errors.get("polygon_orphaned_holes", None) if orphans: - msg = header + 'Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ + msg = ( + header + + "Shapefile format requires that all polygon interior holes be contained by an exterior ring, \ but the Shape contained interior holes (defined by counter-clockwise orientation in the shapefile format) that were \ orphaned, i.e. not contained by any exterior rings. The rings were still included but were \ -encoded as GeoJSON exterior rings instead of holes.' +encoded as GeoJSON exterior rings instead of holes." + ) logger.warning(msg) - only_holes = self._errors.get('polygon_only_holes', None) + only_holes = self._errors.get("polygon_only_holes", None) if only_holes: - msg = header + 'Shapefile format requires that polygons contain at least one exterior ring, \ + msg = ( + header + + "Shapefile format requires that polygons contain at least one exterior ring, \ but the Shape was entirely made up of interior holes (defined by counter-clockwise orientation in the shapefile format). The rings were \ -still included but were encoded as GeoJSON exterior rings instead of holes.' +still included but were encoded as GeoJSON exterior rings instead of holes." + ) logger.warning(msg) # return as geojson if len(polys) == 1: - return { - 'type': 'Polygon', - 'coordinates': polys[0] - } + return {"type": "Polygon", "coordinates": polys[0]} else: - return { - 'type': 'MultiPolygon', - 'coordinates': polys - } + return {"type": "MultiPolygon", "coordinates": polys} else: - raise Exception('Shape type "%s" cannot be represented as GeoJSON.' % SHAPETYPE_LOOKUP[self.shapeType]) + raise Exception( + 'Shape type "%s" cannot be represented as GeoJSON.' + % SHAPETYPE_LOOKUP[self.shapeType] + ) @staticmethod def _from_geojson(geoj): @@ -617,16 +641,16 @@ def _from_geojson(geoj): # set points and parts if geojType == "Point": - shape.points = [ geoj["coordinates"] ] + shape.points = [geoj["coordinates"]] shape.parts = [0] - elif geojType in ("MultiPoint","LineString"): + elif geojType in ("MultiPoint", "LineString"): shape.points = geoj["coordinates"] shape.parts = [0] elif geojType in ("Polygon"): points = [] parts = [] index = 0 - for i,ext_or_hole in enumerate(geoj["coordinates"]): + for i, ext_or_hole in enumerate(geoj["coordinates"]): # although the latest GeoJSON spec states that exterior rings should have # counter-clockwise orientation, we explicitly check orientation since older # GeoJSONs might not enforce this. @@ -656,7 +680,7 @@ def _from_geojson(geoj): parts = [] index = 0 for polygon in geoj["coordinates"]: - for i,ext_or_hole in enumerate(polygon): + for i, ext_or_hole in enumerate(polygon): # although the latest GeoJSON spec states that exterior rings should have # counter-clockwise orientation, we explicitly check orientation since older # GeoJSONs might not enforce this. @@ -683,7 +707,8 @@ def shapeTypeName(self): return SHAPETYPE_LOOKUP[self.shapeType] def __repr__(self): - return 'Shape #{}: {}'.format(self.__oid, self.shapeTypeName) + return "Shape #{}: {}".format(self.__oid, self.shapeTypeName) + class _Record(list): """ @@ -728,14 +753,16 @@ def __getattr__(self, item): corresponding value in the Record does not exist """ try: - if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() - raise AttributeError('_Record does not implement __setstate__') + if item == "__setstate__": # Prevent infinite loop from copy.deepcopy() + raise AttributeError("_Record does not implement __setstate__") index = self.__field_positions[item] return list.__getitem__(self, index) except KeyError: - raise AttributeError('{} is not a field name'.format(item)) + raise AttributeError("{} is not a field name".format(item)) except IndexError: - raise IndexError('{} found as a field but not enough values available.'.format(item)) + raise IndexError( + "{} found as a field but not enough values available.".format(item) + ) def __setattr__(self, key, value): """ @@ -745,13 +772,13 @@ def __setattr__(self, key, value): :return: None :raises: AttributeError, if key is not a field of the shapefile """ - if key.startswith('_'): # Prevent infinite loop when setting mangled attribute + if key.startswith("_"): # Prevent infinite loop when setting mangled attribute return list.__setattr__(self, key, value) try: index = self.__field_positions[key] return list.__setitem__(self, index, value) except KeyError: - raise AttributeError('{} is not a field name'.format(key)) + raise AttributeError("{} is not a field name".format(key)) def __getitem__(self, item): """ @@ -790,7 +817,7 @@ def __setitem__(self, key, value): if index is not None: return list.__setitem__(self, index, value) else: - raise IndexError('{} is not a field name and not an int'.format(key)) + raise IndexError("{} is not a field name and not an int".format(key)) @property def oid(self): @@ -804,13 +831,13 @@ def as_dict(self, date_strings=False): """ dct = dict((f, self[i]) for f, i in self.__field_positions.items()) if date_strings: - for k,v in dct.items(): + for k, v in dct.items(): if isinstance(v, date): - dct[k] = '{:04d}{:02d}{:02d}'.format(v.year, v.month, v.day) + dct[k] = "{:04d}{:02d}{:02d}".format(v.year, v.month, v.day) return dct def __repr__(self): - return 'Record #{}: {}'.format(self.__oid, list(self)) + return "Record #{}: {}".format(self.__oid, list(self)) def __dir__(self): """ @@ -819,22 +846,33 @@ def __dir__(self): :return: List of method names and fields """ - default = list(dir(type(self))) # default list methods and attributes of this class - fnames = list(self.__field_positions.keys()) # plus field names (random order if Python version < 3.6) + default = list( + dir(type(self)) + ) # default list methods and attributes of this class + fnames = list( + self.__field_positions.keys() + ) # plus field names (random order if Python version < 3.6) return default + fnames + class ShapeRecord(object): """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" + def __init__(self, shape=None, record=None): self.shape = shape self.record = record @property def __geo_interface__(self): - return {'type': 'Feature', - 'properties': self.record.as_dict(date_strings=True), - 'geometry': None if self.shape.shapeType == NULL else self.shape.__geo_interface__} + return { + "type": "Feature", + "properties": self.record.as_dict(date_strings=True), + "geometry": None + if self.shape.shapeType == NULL + else self.shape.__geo_interface__, + } + class Shapes(list): """A class to hold a list of Shape objects. Subclasses list to ensure compatibility with @@ -843,16 +881,19 @@ class Shapes(list): to return a GeometryCollection dictionary.""" def __repr__(self): - return 'Shapes: {}'.format(list(self)) + return "Shapes: {}".format(list(self)) @property def __geo_interface__(self): # Note: currently this will fail if any of the shapes are null-geometries # could be fixed by storing the shapefile shapeType upon init, returning geojson type with empty coords - collection = {'type': 'GeometryCollection', - 'geometries': [shape.__geo_interface__ for shape in self]} + collection = { + "type": "GeometryCollection", + "geometries": [shape.__geo_interface__ for shape in self], + } return collection + class ShapeRecords(list): """A class to hold a list of ShapeRecord objects. Subclasses list to ensure compatibility with former work and to reuse all the optimizations of the builtin list. @@ -860,18 +901,23 @@ class ShapeRecords(list): to return a FeatureCollection dictionary.""" def __repr__(self): - return 'ShapeRecords: {}'.format(list(self)) + return "ShapeRecords: {}".format(list(self)) @property def __geo_interface__(self): - collection = {'type': 'FeatureCollection', - 'features': [shaperec.__geo_interface__ for shaperec in self]} + collection = { + "type": "FeatureCollection", + "features": [shaperec.__geo_interface__ for shaperec in self], + } return collection + class ShapefileException(Exception): """An exception to handle shapefile specific problems.""" + pass + class Reader(object): """Reads the three files of a shapefile as a unit or separately. If one of the three files (.shp, .shx, @@ -892,6 +938,7 @@ class Reader(object): efficiently as possible. Shapefiles are usually not large but they can be. """ + def __init__(self, *args, **kwargs): self.shp = None self.shx = None @@ -905,61 +952,81 @@ def __init__(self, *args, **kwargs): self.fields = [] self.__dbfHdrLength = 0 self.__fieldLookup = {} - self.encoding = kwargs.pop('encoding', 'utf-8') - self.encodingErrors = kwargs.pop('encodingErrors', 'strict') + self.encoding = kwargs.pop("encoding", "utf-8") + self.encodingErrors = kwargs.pop("encodingErrors", "strict") # See if a shapefile name was passed as the first argument if len(args) > 0: path = pathlike_obj(args[0]) if is_string(path): - - if '.zip' in path: + if ".zip" in path: # Shapefile is inside a zipfile - if path.count('.zip') > 1: + if path.count(".zip") > 1: # Multiple nested zipfiles - raise ShapefileException('Reading from multiple nested zipfiles is not supported: %s' % path) + raise ShapefileException( + "Reading from multiple nested zipfiles is not supported: %s" + % path + ) # Split into zipfile and shapefile paths - if path.endswith('.zip'): + if path.endswith(".zip"): zpath = path shapefile = None else: - zpath = path[:path.find('.zip')+4] - shapefile = path[path.find('.zip')+4+1:] + zpath = path[: path.find(".zip") + 4] + shapefile = path[path.find(".zip") + 4 + 1 :] # Create a zip file handle - if zpath.startswith('http'): + if zpath.startswith("http"): # Zipfile is from a url # Download to a temporary url and treat as normal zipfile - req = Request(zpath, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) + req = Request( + zpath, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) resp = urlopen(req) # write zipfile data to a read+write tempfile and use as source, gets deleted when garbage collected - zipfileobj = tempfile.NamedTemporaryFile(mode='w+b', suffix='.zip', delete=True) + zipfileobj = tempfile.NamedTemporaryFile( + mode="w+b", suffix=".zip", delete=True + ) zipfileobj.write(resp.read()) zipfileobj.seek(0) else: # Zipfile is from a file - zipfileobj = open(zpath, mode='rb') + zipfileobj = open(zpath, mode="rb") # Open the zipfile archive - with zipfile.ZipFile(zipfileobj, 'r') as archive: + with zipfile.ZipFile(zipfileobj, "r") as archive: if not shapefile: # Only the zipfile path is given # Inspect zipfile contents to find the full shapefile path - shapefiles = [name - for name in archive.namelist() - if (name.endswith('.SHP') or name.endswith('.shp'))] + shapefiles = [ + name + for name in archive.namelist() + if (name.endswith(".SHP") or name.endswith(".shp")) + ] # The zipfile must contain exactly one shapefile if len(shapefiles) == 0: - raise ShapefileException('Zipfile does not contain any shapefiles') + raise ShapefileException( + "Zipfile does not contain any shapefiles" + ) elif len(shapefiles) == 1: shapefile = shapefiles[0] else: - raise ShapefileException('Zipfile contains more than one shapefile: %s. Please specify the full \ - path to the shapefile you would like to open.' % shapefiles ) + raise ShapefileException( + "Zipfile contains more than one shapefile: %s. Please specify the full \ + path to the shapefile you would like to open." + % shapefiles + ) # Try to extract file-like objects from zipfile - shapefile = os.path.splitext(shapefile)[0] # root shapefile name - for ext in ['SHP','SHX','DBF','shp','shx','dbf']: + shapefile = os.path.splitext(shapefile)[ + 0 + ] # root shapefile name + for ext in ["SHP", "SHX", "DBF", "shp", "shx", "dbf"]: try: - member = archive.open(shapefile+'.'+ext) + member = archive.open(shapefile + "." + ext) # write zipfile member data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) fileobj.write(member.read()) fileobj.seek(0) setattr(self, ext.lower(), fileobj) @@ -967,44 +1034,57 @@ def __init__(self, *args, **kwargs): except: pass # Close and delete the temporary zipfile - try: zipfileobj.close() - except: pass + try: + zipfileobj.close() + except: + pass # Try to load shapefile - if (self.shp or self.dbf): + if self.shp or self.dbf: # Load and exit early self.load() return else: - raise ShapefileException("No shp or dbf file found in zipfile: %s" % path) + raise ShapefileException( + "No shp or dbf file found in zipfile: %s" % path + ) - elif path.startswith('http'): + elif path.startswith("http"): # Shapefile is from a url # Download each file to temporary path and treat as normal shapefile path urlinfo = urlparse(path) urlpath = urlinfo[2] - urlpath,_ = os.path.splitext(urlpath) + urlpath, _ = os.path.splitext(urlpath) shapefile = os.path.basename(urlpath) - for ext in ['shp','shx','dbf']: + for ext in ["shp", "shx", "dbf"]: try: _urlinfo = list(urlinfo) - _urlinfo[2] = urlpath + '.' + ext + _urlinfo[2] = urlpath + "." + ext _path = urlunparse(_urlinfo) - req = Request(_path, headers={'User-agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) + req = Request( + _path, + headers={ + "User-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36" + }, + ) resp = urlopen(req) # write url data to a read+write tempfile and use as source, gets deleted on close() - fileobj = tempfile.NamedTemporaryFile(mode='w+b', delete=True) + fileobj = tempfile.NamedTemporaryFile( + mode="w+b", delete=True + ) fileobj.write(resp.read()) fileobj.seek(0) setattr(self, ext, fileobj) self._files_to_close.append(fileobj) except HTTPError: pass - if (self.shp or self.dbf): + if self.shp or self.dbf: # Load and exit early self.load() return else: - raise ShapefileException("No shp or dbf file found at url: %s" % path) + raise ShapefileException( + "No shp or dbf file found at url: %s" % path + ) else: # Local file path to a shapefile @@ -1057,14 +1137,18 @@ def __str__(self): """ Use some general info on the shapefile as __str__ """ - info = ['shapefile Reader'] + info = ["shapefile Reader"] if self.shp: - info.append(" {} shapes (type '{}')".format( - len(self), SHAPETYPE_LOOKUP[self.shapeType])) + info.append( + " {} shapes (type '{}')".format( + len(self), SHAPETYPE_LOOKUP[self.shapeType] + ) + ) if self.dbf: - info.append(' {} records ({} fields)'.format( - len(self), len(self.fields))) - return '\n'.join(info) + info.append( + " {} records ({} fields)".format(len(self), len(self.fields)) + ) + return "\n".join(info) def __enter__(self): """ @@ -1101,11 +1185,11 @@ def __len__(self): # Determine length of shp file shp = self.shp checkpoint = shp.tell() - shp.seek(0,2) + shp.seek(0, 2) shpLength = shp.tell() shp.seek(100) # Do a fast shape iteration until end of file. - unpack = Struct('>2i').unpack + unpack = Struct(">2i").unpack offsets = [] pos = shp.tell() while pos < shpLength: @@ -1136,7 +1220,7 @@ def __iter__(self): def __geo_interface__(self): shaperecords = self.shapeRecords() fcollection = shaperecords.__geo_interface__ - fcollection['bbox'] = list(self.bbox) + fcollection["bbox"] = list(self.bbox) return fcollection @property @@ -1154,7 +1238,9 @@ def load(self, shapefile=None): self.load_shx(shapeName) self.load_dbf(shapeName) if not (self.shp or self.dbf): - raise ShapefileException("Unable to open %s.dbf or %s.shp." % (shapeName, shapeName)) + raise ShapefileException( + "Unable to open %s.dbf or %s.shp." % (shapeName, shapeName) + ) if self.shp: self.__shpHeader() if self.dbf: @@ -1166,7 +1252,7 @@ def load_shp(self, shapefile_name): """ Attempts to load file with .shp extension as both lower and upper case """ - shp_ext = 'shp' + shp_ext = "shp" try: self.shp = open("%s.%s" % (shapefile_name, shp_ext), "rb") self._files_to_close.append(self.shp) @@ -1181,7 +1267,7 @@ def load_shx(self, shapefile_name): """ Attempts to load file with .shx extension as both lower and upper case """ - shx_ext = 'shx' + shx_ext = "shx" try: self.shx = open("%s.%s" % (shapefile_name, shx_ext), "rb") self._files_to_close.append(self.shx) @@ -1196,7 +1282,7 @@ def load_dbf(self, shapefile_name): """ Attempts to load file with .dbf extension as both lower and upper case """ - dbf_ext = 'dbf' + dbf_ext = "dbf" try: self.dbf = open("%s.%s" % (shapefile_name, dbf_ext), "rb") self._files_to_close.append(self.dbf) @@ -1213,7 +1299,7 @@ def __del__(self): def close(self): # Close any files that the reader opened (but not those given by user) for attribute in self._files_to_close: - if hasattr(attribute, 'close'): + if hasattr(attribute, "close"): try: attribute.close() except IOError: @@ -1224,7 +1310,9 @@ def __getFileObj(self, f): """Checks to see if the requested shapefile file object is available. If not a ShapefileException is raised.""" if not f: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object.") + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object." + ) if self.shp and self.shpLength is None: self.load() if self.dbf and len(self.fields) == 0: @@ -1238,27 +1326,30 @@ def __restrictIndex(self, i): rmax = self.numRecords - 1 if abs(i) > rmax: raise IndexError("Shape or Record index out of range.") - if i < 0: i = range(self.numRecords)[i] + if i < 0: + i = range(self.numRecords)[i] return i def __shpHeader(self): """Reads the header information from a .shp file.""" if not self.shp: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shp file found") + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shp file found" + ) shp = self.shp # File length (16-bit word * 2 = bytes) shp.seek(24) self.shpLength = unpack(">i", shp.read(4))[0] * 2 # Shape type shp.seek(32) - self.shapeType= unpack(" NODATA: self.mbox.append(m) @@ -1279,8 +1370,8 @@ def __shape(self, oid=None, bbox=None): if shapeType == 0: record.points = [] # All shape types capable of having a bounding box - elif shapeType in (3,5,8,13,15,18,23,25,28,31): - record.bbox = _Array('d', unpack("<4d", f.read(32))) + elif shapeType in (3, 5, 8, 13, 15, 18, 23, 25, 28, 31): + record.bbox = _Array("d", unpack("<4d", f.read(32))) # if bbox specified and no overlap, skip this shape if bbox is not None and not bbox_overlap(bbox, record.bbox): # because we stop parsing this shape, skip to beginning of @@ -1288,33 +1379,33 @@ def __shape(self, oid=None, bbox=None): f.seek(next) return None # Shape types with parts - if shapeType in (3,5,13,15,23,25,31): + if shapeType in (3, 5, 13, 15, 23, 25, 31): nParts = unpack("= 16: (mmin, mmax) = unpack("<2d", f.read(16)) # Measure values less than -10e38 are nodata values according to the spec if next - f.tell() >= nPoints * 8: record.m = [] - for m in _Array('d', unpack("<%sd" % nPoints, f.read(nPoints * 8))): + for m in _Array("d", unpack("<%sd" % nPoints, f.read(nPoints * 8))): if m > NODATA: record.m.append(m) else: @@ -1322,8 +1413,8 @@ def __shape(self, oid=None, bbox=None): else: record.m = [None for _ in range(nPoints)] # Read a single point - if shapeType in (1,11,21): - record.points = [_Array('d', unpack("<2d", f.read(16)))] + if shapeType in (1, 11, 21): + record.points = [_Array("d", unpack("<2d", f.read(16)))] if bbox is not None: # create bounding box for Point by duplicating coordinates point_bbox = list(record.points[0] + record.points[0]) @@ -1335,7 +1426,7 @@ def __shape(self, oid=None, bbox=None): if shapeType == 11: record.z = list(unpack("= 8: (m,) = unpack("i", shx.read(4))[0] * 2) - 100 self.numShapes = shxRecordLength // 8 def __shxOffsets(self): - '''Reads the shape offset positions from a .shx file''' + """Reads the shape offset positions from a .shx file""" shx = self.shx if not shx: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no shx file found") + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no shx file found" + ) # Jump to the first record. shx.seek(100) # Each index record consists of two nrs, we only want the first one - shxRecords = _Array('i', shx.read(2 * self.numShapes * 4) ) - if sys.byteorder != 'big': + shxRecords = _Array("i", shx.read(2 * self.numShapes * 4)) + if sys.byteorder != "big": shxRecords.byteswap() self._offsets = [2 * el for el in shxRecords[::2]] @@ -1398,11 +1493,11 @@ def shape(self, i=0, bbox=None): if not offset: # Shx index not available. # Determine length of shp file - shp.seek(0,2) + shp.seek(0, 2) shpLength = shp.tell() shp.seek(100) # Do a fast shape iteration until the requested index or end of file. - unpack = Struct('>2i').unpack + unpack = Struct(">2i").unpack _i = 0 offset = shp.tell() while offset < shpLength: @@ -1417,7 +1512,11 @@ def shape(self, i=0, bbox=None): _i += 1 # If the index was not found, it likely means the .shp file is incomplete if _i != i: - raise ShapefileException('Shape index {} is out of bounds; the .shp file only contains {} shapes'.format(i, _i)) + raise ShapefileException( + "Shape index {} is out of bounds; the .shp file only contains {} shapes".format( + i, _i + ) + ) # Seek to the offset and read the shape shp.seek(offset) @@ -1443,7 +1542,7 @@ def iterShapes(self, bbox=None): # shp file length in the header. Can't trust # that so we seek to the end of the file # and figure it out. - shp.seek(0,2) + shp.seek(0, 2) shpLength = shp.tell() shp.seek(100) @@ -1477,12 +1576,15 @@ def iterShapes(self, bbox=None): def __dbfHeader(self): """Reads a dbf header. Xbase-related code borrows heavily from ActiveState Python Cookbook Recipe 362715 by Raymond Hettinger""" if not self.dbf: - raise ShapefileException("Shapefile Reader requires a shapefile or file-like object. (no dbf file found)") + raise ShapefileException( + "Shapefile Reader requires a shapefile or file-like object. (no dbf file found)" + ) dbf = self.dbf # read relevant header parts dbf.seek(0) - self.numRecords, self.__dbfHdrLength, self.__recordLength = \ - unpack("6i", 9994,0,0,0,0,0)) + f.write(pack(">6i", 9994, 0, 0, 0, 0, 0)) # File length (Bytes / 2 = 16-bit words) - if headerType == 'shp': + if headerType == "shp": f.write(pack(">i", self.__shpFileLength())) - elif headerType == 'shx': - f.write(pack('>i', ((100 + (self.shpNum * 8)) // 2))) + elif headerType == "shx": + f.write(pack(">i", ((100 + (self.shpNum * 8)) // 2))) # Version, Shape type if self.shapeType is None: self.shapeType = NULL @@ -1997,37 +2142,41 @@ def __shapefileHeader(self, fileObj, headerType='shp'): # In such cases of empty shapefiles, ESRI spec says the bbox values are 'unspecified'. # Not sure what that means, so for now just setting to 0s, which is the same behavior as in previous versions. # This would also make sense since the Z and M bounds are similarly set to 0 for non-Z/M type shapefiles. - bbox = [0,0,0,0] + bbox = [0, 0, 0, 0] f.write(pack("<4d", *bbox)) except error: - raise ShapefileException("Failed to write shapefile bounding box. Floats required.") + raise ShapefileException( + "Failed to write shapefile bounding box. Floats required." + ) else: - f.write(pack("<4d", 0,0,0,0)) + f.write(pack("<4d", 0, 0, 0, 0)) # Elevation - if self.shapeType in (11,13,15,18): + if self.shapeType in (11, 13, 15, 18): # Z values are present in Z type zbox = self.zbox() if zbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - zbox = [0,0] + zbox = [0, 0] else: # As per the ESRI shapefile spec, the zbox for non-Z type shapefiles are set to 0s - zbox = [0,0] + zbox = [0, 0] # Measure - if self.shapeType in (11,13,15,18,21,23,25,28,31): + if self.shapeType in (11, 13, 15, 18, 21, 23, 25, 28, 31): # M values are present in M or Z type mbox = self.mbox() if mbox is None: # means we have empty shapefile/only null geoms (see commentary on bbox above) - mbox = [0,0] + mbox = [0, 0] else: # As per the ESRI shapefile spec, the mbox for non-M type shapefiles are set to 0s - mbox = [0,0] + mbox = [0, 0] # Try writing try: f.write(pack("<4d", zbox[0], zbox[1], mbox[0], mbox[1])) except error: - raise ShapefileException("Failed to write shapefile elevation and measure values. Floats required.") + raise ShapefileException( + "Failed to write shapefile elevation and measure values. Floats required." + ) def __dbfHeader(self): """Writes the dbf header and field descriptors.""" @@ -2037,32 +2186,43 @@ def __dbfHeader(self): year, month, day = time.localtime()[:3] year -= 1900 # Get all fields, ignoring DeletionFlag if specified - fields = [field for field in self.fields if field[0] != 'DeletionFlag'] + fields = [field for field in self.fields if field[0] != "DeletionFlag"] # Ensure has at least one field if not fields: - raise ShapefileException("Shapefile dbf file must contain at least one field.") + raise ShapefileException( + "Shapefile dbf file must contain at least one field." + ) numRecs = self.recNum numFields = len(fields) headerLength = numFields * 32 + 33 if headerLength >= 65535: raise ShapefileException( - "Shapefile dbf header length exceeds maximum length.") + "Shapefile dbf header length exceeds maximum length." + ) recordLength = sum([int(field[2]) for field in fields]) + 1 - header = pack(' 2 else 0)) for p in s.points] except error: - raise ShapefileException("Failed to write elevation values for record %s. Expected floats." % self.shpNum) + raise ShapefileException( + "Failed to write elevation values for record %s. Expected floats." + % self.shpNum + ) # Write m extremes and values # When reading a file, pyshp converts NODATA m values to None, so here we make sure to convert them back to NODATA # Note: missing m values are autoset to NODATA. - if s.shapeType in (13,15,18,23,25,28,31): + if s.shapeType in (13, 15, 18, 23, 25, 28, 31): try: f.write(pack("<2d", *self.__mbox(s))) except error: - raise ShapefileException("Failed to write measure extremes for record %s. Expected floats" % self.shpNum) + raise ShapefileException( + "Failed to write measure extremes for record %s. Expected floats" + % self.shpNum + ) try: - if hasattr(s,"m"): + if hasattr(s, "m"): # if m values are stored in attribute - f.write(pack("<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m])) + f.write( + pack( + "<%sd" % len(s.m), + *[m if m is not None else NODATA for m in s.m], + ) + ) else: # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) - mpos = 3 if s.shapeType in (13,15,18,31) else 2 - [f.write(pack(" mpos and p[mpos] is not None else NODATA)) for p in s.points] + mpos = 3 if s.shapeType in (13, 15, 18, 31) else 2 + [ + f.write( + pack( + " mpos and p[mpos] is not None + else NODATA, + ) + ) + for p in s.points + ] except error: - raise ShapefileException("Failed to write measure values for record %s. Expected floats" % self.shpNum) + raise ShapefileException( + "Failed to write measure values for record %s. Expected floats" + % self.shpNum + ) # Write a single point - if s.shapeType in (1,11,21): + if s.shapeType in (1, 11, 21): try: f.write(pack("<2d", s.points[0][0], s.points[0][1])) except error: - raise ShapefileException("Failed to write point for record %s. Expected floats." % self.shpNum) + raise ShapefileException( + "Failed to write point for record %s. Expected floats." + % self.shpNum + ) # Write a single Z value # Note: missing z values are autoset to 0, but not sure if this is ideal. if s.shapeType == 11: @@ -2182,7 +2383,10 @@ def __shpRecord(self, s): s.z = (0,) f.write(pack("i", length)) f.seek(finish) - return offset,length + return offset, length def __shxRecord(self, offset, length): - """Writes the shx records.""" - f = self.__getFileObj(self.shx) - try: - f.write(pack(">i", offset // 2)) - except error: - raise ShapefileException('The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones.') - f.write(pack(">i", length)) + """Writes the shx records.""" + f = self.__getFileObj(self.shx) + try: + f.write(pack(">i", offset // 2)) + except error: + raise ShapefileException( + "The .shp file has reached its file size limit > 4294967294 bytes (4.29 GB). To fix this, break up your file into multiple smaller ones." + ) + f.write(pack(">i", length)) def record(self, *recordList, **recordDict): """Creates a dbf attribute record. You can submit either a sequence of @@ -2247,7 +2462,7 @@ def record(self, *recordList, **recordDict): if self.autoBalance and self.recNum > self.shpNum: self.balance() - fieldCount = sum((1 for field in self.fields if field[0] != 'DeletionFlag')) + fieldCount = sum((1 for field in self.fields if field[0] != "DeletionFlag")) if recordList: record = list(recordList) while len(record) < fieldCount: @@ -2255,8 +2470,8 @@ def record(self, *recordList, **recordDict): elif recordDict: record = [] for field in self.fields: - if field[0] == 'DeletionFlag': - continue # ignore deletionflag field in case it was specified + if field[0] == "DeletionFlag": + continue # ignore deletionflag field in case it was specified if field[0] in recordDict: val = recordDict[field[0]] if val is None: @@ -2264,7 +2479,7 @@ def record(self, *recordList, **recordDict): else: record.append(val) else: - record.append("") # need empty value for missing dict entries + record.append("") # need empty value for missing dict entries else: # Blank fields for empty record record = ["" for _ in range(fieldCount)] @@ -2279,18 +2494,20 @@ def __dbfRecord(self, record): # cannot change the fields after this point self.__dbfHeader() # first byte of the record is deletion flag, always disabled - f.write(b' ') + f.write(b" ") # begin self.recNum += 1 - fields = (field for field in self.fields if field[0] != 'DeletionFlag') # ignore deletionflag field in case it was specified + fields = ( + field for field in self.fields if field[0] != "DeletionFlag" + ) # ignore deletionflag field in case it was specified for (fieldName, fieldType, size, deci), value in zip(fields, record): # write fieldType = fieldType.upper() size = int(size) - if fieldType in ("N","F"): + if fieldType in ("N", "F"): # numeric or float: number stored as a string, right justified, and padded with blanks to the width of the field. if value in MISSING: - value = b"*"*size # QGIS NULL + value = b"*" * size # QGIS NULL elif not deci: # force to int try: @@ -2301,42 +2518,54 @@ def __dbfRecord(self, record): except ValueError: # forcing directly to int failed, so was probably a float. value = int(float(value)) - value = format(value, "d")[:size].rjust(size) # caps the size if exceeds the field size + value = format(value, "d")[:size].rjust( + size + ) # caps the size if exceeds the field size else: value = float(value) - value = format(value, ".%sf"%deci)[:size].rjust(size) # caps the size if exceeds the field size + value = format(value, ".%sf" % deci)[:size].rjust( + size + ) # caps the size if exceeds the field size elif fieldType == "D": # date: 8 bytes - date stored as a string in the format YYYYMMDD. if isinstance(value, date): - value = '{:04d}{:02d}{:02d}'.format(value.year, value.month, value.day) + value = "{:04d}{:02d}{:02d}".format( + value.year, value.month, value.day + ) elif isinstance(value, list) and len(value) == 3: - value = '{:04d}{:02d}{:02d}'.format(*value) + value = "{:04d}{:02d}{:02d}".format(*value) elif value in MISSING: - value = b'0' * 8 # QGIS NULL for date type + value = b"0" * 8 # QGIS NULL for date type elif is_string(value) and len(value) == 8: - pass # value is already a date string + pass # value is already a date string else: - raise ShapefileException("Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value.") - elif fieldType == 'L': + raise ShapefileException( + "Date values must be either a datetime.date object, a list, a YYYYMMDD string, or a missing value." + ) + elif fieldType == "L": # logical: 1 byte - initialized to 0x20 (space) otherwise T or F. if value in MISSING: - value = b' ' # missing is set to space - elif value in [True,1]: - value = b'T' - elif value in [False,0]: - value = b'F' + value = b" " # missing is set to space + elif value in [True, 1]: + value = b"T" + elif value in [False, 0]: + value = b"F" else: - value = b' ' # unknown is set to space + value = b" " # unknown is set to space else: # anything else is forced to string, truncated to the length of the field value = b(value, self.encoding, self.encodingErrors)[:size].ljust(size) if not isinstance(value, bytes): # just in case some of the numeric format() and date strftime() results are still in unicode (Python 3 only) - value = b(value, 'ascii', self.encodingErrors) # should be default ascii encoding + value = b( + value, "ascii", self.encodingErrors + ) # should be default ascii encoding if len(value) != size: raise ShapefileException( "Shapefile Writer unable to pack incorrect sized value" - " (size %d) into field '%s' (size %d)." % (len(value), fieldName, size)) + " (size %d) into field '%s' (size %d)." + % (len(value), fieldName, size) + ) f.write(value) def balance(self): @@ -2348,12 +2577,10 @@ def balance(self): while self.recNum < self.shpNum: self.record() - def null(self): """Creates a null shape.""" self.shape(Shape(NULL)) - def point(self, x, y): """Creates a POINT shape.""" shapeType = POINT @@ -2378,12 +2605,13 @@ def pointz(self, x, y, z=0, m=None): pointShape.points.append([x, y, z, m]) self.shape(pointShape) - def multipoint(self, points): """Creates a MULTIPOINT shape. Points is a list of xy values.""" shapeType = MULTIPOINT - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=points, shapeType=shapeType) def multipointm(self, points): @@ -2391,7 +2619,9 @@ def multipointm(self, points): Points is a list of xym values. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = MULTIPOINTM - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=points, shapeType=shapeType) def multipointz(self, points): @@ -2400,10 +2630,11 @@ def multipointz(self, points): If the z (elevation) value is not included, it defaults to 0. If the m (measure) value is not included, it defaults to None (NoData).""" shapeType = MULTIPOINTZ - points = [points] # nest the points inside a list to be compatible with the generic shapeparts method + points = [ + points + ] # nest the points inside a list to be compatible with the generic shapeparts method self._shapeparts(parts=points, shapeType=shapeType) - def line(self, lines): """Creates a POLYLINE shape. Lines is a collection of lines, each made up of a list of xy values.""" @@ -2425,7 +2656,6 @@ def linez(self, lines): shapeType = POLYLINEZ self._shapeparts(parts=lines, shapeType=shapeType) - def poly(self, polys): """Creates a POLYGON shape. Polys is a collection of polygons, each made up of a list of xy values. @@ -2453,7 +2683,6 @@ def polyz(self, polys): shapeType = POLYGONZ self._shapeparts(parts=polys, shapeType=shapeType) - def multipatch(self, parts, partTypes): """Creates a MULTIPATCH shape. Parts is a collection of 3D surface patches, each made up of a list of xyzm values. @@ -2479,7 +2708,6 @@ def multipatch(self, parts, partTypes): # write the shape self.shape(polyShape) - def _shapeparts(self, parts, shapeType): """Internal method for adding a shape that has multiple collections of points (parts): lines, polygons, and multipoint shapes. @@ -2488,7 +2716,7 @@ def _shapeparts(self, parts, shapeType): polyShape.parts = [] polyShape.points = [] # Make sure polygon rings (parts) are closed - if shapeType in (5,15,25,31): + if shapeType in (5, 15, 25, 31): for part in parts: if part[0] != part[-1]: part.append(part[0]) @@ -2515,20 +2743,23 @@ def field(self, name, fieldType="C", size="50", decimal=0): decimal = 0 if len(self.fields) >= 2046: raise ShapefileException( - "Shapefile Writer reached maximum number of fields: 2046.") + "Shapefile Writer reached maximum number of fields: 2046." + ) self.fields.append((name, fieldType, size, decimal)) # Begin Testing def test(**kwargs): import doctest + doctest.NORMALIZE_WHITESPACE = 1 - verbosity = kwargs.get('verbose', 0) + verbosity = kwargs.get("verbose", 0) if verbosity == 0: - print('Running doctests...') + print("Running doctests...") # ignore py2-3 unicode differences import re + class Py23DocChecker(doctest.OutputChecker): def check_output(self, want, got, optionflags): if sys.version_info[0] == 2: @@ -2536,13 +2767,20 @@ def check_output(self, want, got, optionflags): got = re.sub('u"(.*?)"', '"\\1"', got) res = doctest.OutputChecker.check_output(self, want, got, optionflags) return res + def summarize(self): doctest.OutputChecker.summarize(True) # run tests runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) - with open("README.md","rb") as fobj: - test = doctest.DocTestParser().get_doctest(string=fobj.read().decode("utf8").replace('\r\n','\n'), globs={}, name="README", filename="README.md", lineno=0) + with open("README.md", "rb") as fobj: + test = doctest.DocTestParser().get_doctest( + string=fobj.read().decode("utf8").replace("\r\n", "\n"), + globs={}, + name="README", + filename="README.md", + lineno=0, + ) failure_count, test_count = runner.run(test) # print results @@ -2550,12 +2788,13 @@ def summarize(self): runner.summarize(True) else: if failure_count == 0: - print('All test passed successfully') + print("All test passed successfully") elif failure_count > 0: runner.summarize(verbosity) return failure_count + if __name__ == "__main__": """ Doctests are contained in the file 'README.md', and are tested using the built-in diff --git a/test_shapefile.py b/test_shapefile.py index f5dd7187..ae173ce1 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -19,184 +19,418 @@ import shapefile # define various test shape tuples of (type, points, parts indexes, and expected geo interface output) -geo_interface_tests = [ (shapefile.POINT, # point - [(1,1)], - [], - {'type':'Point','coordinates':(1,1)} - ), - (shapefile.MULTIPOINT, # multipoint - [(1,1),(2,1),(2,2)], - [], - {'type':'MultiPoint','coordinates':[(1,1),(2,1),(2,2)]} - ), - (shapefile.POLYLINE, # single linestring - [(1,1),(2,1)], - [0], - {'type':'LineString','coordinates':[(1,1),(2,1)]} - ), - (shapefile.POLYLINE, # multi linestring - [(1,1),(2,1), # line 1 - (10,10),(20,10)], # line 2 - [0,2], - {'type':'MultiLineString','coordinates':[ - [(1,1),(2,1)], # line 1 - [(10,10),(20,10)] # line 2 - ]} - ), - (shapefile.POLYGON, # single polygon, no holes - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior - ], - [0], - {'type':'Polygon','coordinates':[ - [(1,1),(1,9),(9,9),(9,1),(1,1)], - ]} - ), - (shapefile.POLYGON, # single polygon, holes (ordered) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1 - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 2 - ], - [0,5,5+5], - {'type':'Polygon','coordinates':[ - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ]} - ), - (shapefile.POLYGON, # single polygon, holes (unordered) - [ - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1 - (1,1),(1,9),(9,9),(9,1),(1,1), # exterior - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 2 - ], - [0,5,5+5], - {'type':'Polygon','coordinates':[ - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ]} - ), - (shapefile.POLYGON, # multi polygon, no holes - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior - (11,11),(11,19),(19,19),(19,11),(11,11), # exterior - ], - [0,5], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], - ], - [ # poly 2 - [(11,11),(11,19),(19,19),(19,11),(11,11)], - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, holes (unordered) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (11,11),(11,19),(19,19),(19,11),(11,11), # exterior 2 - (12,12),(14,12),(14,14),(12,14),(12,12), # hole 2.1 - (15,15),(17,15),(17,17),(15,17),(15,15), # hole 2.2 - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1.1 - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 1.2 - ], - [0,5,10,15,20,25], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ], - [ # poly 2 - [(11,11),(11,19),(19,19),(19,11),(11,11)], # exterior - [(12,12),(14,12),(14,14),(12,14),(12,12)], # hole 1 - [(15,15),(17,15),(17,17),(15,17),(15,15)], # hole 2 - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (3,3),(3,7),(7,7),(7,3),(3,3), # exterior 2 - (4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5), # exterior 3 - (4,4),(6,4),(6,6),(4,6),(4,4), # hole 2.1 - (2,2),(8,2),(8,8),(2,8),(2,2), # hole 1.1 - ], - [0,5,10,15,20], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior 1 - [(2,2),(8,2),(8,8),(2,8),(2,2)], # hole 1.1 - ], - [ # poly 2 - [(3,3),(3,7),(7,7),(7,3),(3,3)], # exterior 2 - [(4,4),(6,4),(6,6),(4,6),(4,4)], # hole 2.1 - ], - [ # poly 3 - [(4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5)], # exterior 3 - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered and tricky holes designed to throw off ring_sample() test) - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (3,3),(3,7),(7,7),(7,3),(3,3), # exterior 2 - (4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5), # exterior 3 - (4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4), # hole 2.1 (hole has duplicate coords) - (2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2), # hole 1.1 (hole coords form straight line and starts in concave orientation) - ], - [0,5,10,15,20+3], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior 1 - [(2,2),(3,3),(4,2),(8,2),(8,8),(4,8),(2,8),(2,4),(2,2)], # hole 1.1 - ], - [ # poly 2 - [(3,3),(3,7),(7,7),(7,3),(3,3)], # exterior 2 - [(4,4),(4,4),(6,4),(6,4),(6,4),(6,6),(4,6),(4,4)], # hole 2.1 - ], - [ # poly 3 - [(4.5,4.5),(4.5,5.5),(5.5,5.5),(5.5,4.5),(4.5,4.5)], # exterior 3 - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, holes incl orphaned holes (unordered), should raise warning - [(1,1),(1,9),(9,9),(9,1),(1,1), # exterior 1 - (11,11),(11,19),(19,19),(19,11),(11,11), # exterior 2 - (12,12),(14,12),(14,14),(12,14),(12,12), # hole 2.1 - (15,15),(17,15),(17,17),(15,17),(15,15), # hole 2.2 - (95,95),(97,95),(97,97),(95,97),(95,95), # hole x.1 (orphaned hole, should be interpreted as exterior) - (2,2),(4,2),(4,4),(2,4),(2,2), # hole 1.1 - (5,5),(7,5),(7,7),(5,7),(5,5), # hole 1.2 - ], - [0,5,10,15,20,25,30], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(1,9),(9,9),(9,1),(1,1)], # exterior - [(2,2),(4,2),(4,4),(2,4),(2,2)], # hole 1 - [(5,5),(7,5),(7,7),(5,7),(5,5)], # hole 2 - ], - [ # poly 2 - [(11,11),(11,19),(19,19),(19,11),(11,11)], # exterior - [(12,12),(14,12),(14,14),(12,14),(12,12)], # hole 1 - [(15,15),(17,15),(17,17),(15,17),(15,15)], # hole 2 - ], - [ # poly 3 (orphaned hole) - [(95,95),(97,95),(97,97),(95,97),(95,95)], # exterior - ], - ]} - ), - (shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such), should raise warning - [(1,1),(9,1),(9,9),(1,9),(1,1), # exterior with hole-orientation - (11,11),(19,11),(19,19),(11,19),(11,11), # exterior with hole-orientation - ], - [0,5], - {'type':'MultiPolygon','coordinates':[ - [ # poly 1 - [(1,1),(9,1),(9,9),(1,9),(1,1)], - ], - [ # poly 2 - [(11,11),(19,11),(19,19),(11,19),(11,11)], - ], - ]} - ), - ] +geo_interface_tests = [ + ( + shapefile.POINT, # point + [(1, 1)], + [], + {"type": "Point", "coordinates": (1, 1)}, + ), + ( + shapefile.MULTIPOINT, # multipoint + [(1, 1), (2, 1), (2, 2)], + [], + {"type": "MultiPoint", "coordinates": [(1, 1), (2, 1), (2, 2)]}, + ), + ( + shapefile.POLYLINE, # single linestring + [(1, 1), (2, 1)], + [0], + {"type": "LineString", "coordinates": [(1, 1), (2, 1)]}, + ), + ( + shapefile.POLYLINE, # multi linestring + [ + (1, 1), + (2, 1), # line 1 + (10, 10), + (20, 10), + ], # line 2 + [0, 2], + { + "type": "MultiLineString", + "coordinates": [ + [(1, 1), (2, 1)], # line 1 + [(10, 10), (20, 10)], # line 2 + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, no holes + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + ], + [0], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, holes (ordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 2 + ], + [0, 5, 5 + 5], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + }, + ), + ( + shapefile.POLYGON, # single polygon, holes (unordered) + [ + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1 + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 2 + ], + [0, 5, 5 + 5], + { + "type": "Polygon", + "coordinates": [ + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, no holes + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior + ], + [0, 5], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, holes (unordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior 2 + (12, 12), + (14, 12), + (14, 14), + (12, 14), + (12, 12), # hole 2.1 + (15, 15), + (17, 15), + (17, 17), + (15, 17), + (15, 15), # hole 2.2 + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1.1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 1.2 + ], + [0, 5, 10, 15, 20, 25], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], # exterior + [(12, 12), (14, 12), (14, 14), (12, 14), (12, 12)], # hole 1 + [(15, 15), (17, 15), (17, 17), (15, 17), (15, 15)], # hole 2 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (3, 3), + (3, 7), + (7, 7), + (7, 3), + (3, 3), # exterior 2 + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), # exterior 3 + (4, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), # hole 2.1 + (2, 2), + (8, 2), + (8, 8), + (2, 8), + (2, 2), # hole 1.1 + ], + [0, 5, 10, 15, 20], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior 1 + [(2, 2), (8, 2), (8, 8), (2, 8), (2, 2)], # hole 1.1 + ], + [ # poly 2 + [(3, 3), (3, 7), (7, 7), (7, 3), (3, 3)], # exterior 2 + [(4, 4), (6, 4), (6, 6), (4, 6), (4, 4)], # hole 2.1 + ], + [ # poly 3 + [ + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), + ], # exterior 3 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, nested exteriors with holes (unordered and tricky holes designed to throw off ring_sample() test) + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (3, 3), + (3, 7), + (7, 7), + (7, 3), + (3, 3), # exterior 2 + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), # exterior 3 + (4, 4), + (4, 4), + (6, 4), + (6, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), # hole 2.1 (hole has duplicate coords) + (2, 2), + (3, 3), + (4, 2), + (8, 2), + (8, 8), + (4, 8), + (2, 8), + (2, 4), + ( + 2, + 2, + ), # hole 1.1 (hole coords form straight line and starts in concave orientation) + ], + [0, 5, 10, 15, 20 + 3], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior 1 + [ + (2, 2), + (3, 3), + (4, 2), + (8, 2), + (8, 8), + (4, 8), + (2, 8), + (2, 4), + (2, 2), + ], # hole 1.1 + ], + [ # poly 2 + [(3, 3), (3, 7), (7, 7), (7, 3), (3, 3)], # exterior 2 + [ + (4, 4), + (4, 4), + (6, 4), + (6, 4), + (6, 4), + (6, 6), + (4, 6), + (4, 4), + ], # hole 2.1 + ], + [ # poly 3 + [ + (4.5, 4.5), + (4.5, 5.5), + (5.5, 5.5), + (5.5, 4.5), + (4.5, 4.5), + ], # exterior 3 + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, holes incl orphaned holes (unordered), should raise warning + [ + (1, 1), + (1, 9), + (9, 9), + (9, 1), + (1, 1), # exterior 1 + (11, 11), + (11, 19), + (19, 19), + (19, 11), + (11, 11), # exterior 2 + (12, 12), + (14, 12), + (14, 14), + (12, 14), + (12, 12), # hole 2.1 + (15, 15), + (17, 15), + (17, 17), + (15, 17), + (15, 15), # hole 2.2 + (95, 95), + (97, 95), + (97, 97), + (95, 97), + (95, 95), # hole x.1 (orphaned hole, should be interpreted as exterior) + (2, 2), + (4, 2), + (4, 4), + (2, 4), + (2, 2), # hole 1.1 + (5, 5), + (7, 5), + (7, 7), + (5, 7), + (5, 5), # hole 1.2 + ], + [0, 5, 10, 15, 20, 25, 30], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (1, 9), (9, 9), (9, 1), (1, 1)], # exterior + [(2, 2), (4, 2), (4, 4), (2, 4), (2, 2)], # hole 1 + [(5, 5), (7, 5), (7, 7), (5, 7), (5, 5)], # hole 2 + ], + [ # poly 2 + [(11, 11), (11, 19), (19, 19), (19, 11), (11, 11)], # exterior + [(12, 12), (14, 12), (14, 14), (12, 14), (12, 12)], # hole 1 + [(15, 15), (17, 15), (17, 17), (15, 17), (15, 15)], # hole 2 + ], + [ # poly 3 (orphaned hole) + [(95, 95), (97, 95), (97, 97), (95, 97), (95, 95)], # exterior + ], + ], + }, + ), + ( + shapefile.POLYGON, # multi polygon, exteriors with wrong orientation (be nice and interpret as such), should raise warning + [ + (1, 1), + (9, 1), + (9, 9), + (1, 9), + (1, 1), # exterior with hole-orientation + (11, 11), + (19, 11), + (19, 19), + (11, 19), + (11, 11), # exterior with hole-orientation + ], + [0, 5], + { + "type": "MultiPolygon", + "coordinates": [ + [ # poly 1 + [(1, 1), (9, 1), (9, 9), (1, 9), (1, 1)], + ], + [ # poly 2 + [(11, 11), (19, 11), (19, 19), (11, 19), (11, 11)], + ], + ], + }, + ), +] + def test_empty_shape_geo_interface(): """ @@ -206,7 +440,8 @@ def test_empty_shape_geo_interface(): """ shape = shapefile.Shape() with pytest.raises(Exception): - getattr(shape, '__geo_interface__') + getattr(shape, "__geo_interface__") + @pytest.mark.parametrize("typ,points,parts,expected", geo_interface_tests) def test_expected_shape_geo_interface(typ, points, parts, expected): @@ -222,22 +457,22 @@ def test_expected_shape_geo_interface(typ, points, parts, expected): def test_reader_geo_interface(): with shapefile.Reader("shapefiles/blockgroups") as r: geoj = r.__geo_interface__ - assert geoj['type'] == 'FeatureCollection' - assert 'bbox' in geoj + assert geoj["type"] == "FeatureCollection" + assert "bbox" in geoj assert json.dumps(geoj) def test_shapes_geo_interface(): with shapefile.Reader("shapefiles/blockgroups") as r: geoj = r.shapes().__geo_interface__ - assert geoj['type'] == 'GeometryCollection' + assert geoj["type"] == "GeometryCollection" assert json.dumps(geoj) def test_shaperecords_geo_interface(): with shapefile.Reader("shapefiles/blockgroups") as r: geoj = r.shapeRecords().__geo_interface__ - assert geoj['type'] == 'FeatureCollection' + assert geoj["type"] == "FeatureCollection" assert json.dumps(geoj) @@ -299,14 +534,18 @@ def test_reader_zip(): pass # test specifying the path when reading multi-shapefile zipfile (with extension) - with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp") as sf: + with shapefile.Reader( + "shapefiles/blockgroups_multishapefile.zip/blockgroups2.shp" + ) as sf: for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test specifying the path when reading multi-shapefile zipfile (without extension) - with shapefile.Reader("shapefiles/blockgroups_multishapefile.zip/blockgroups2") as sf: + with shapefile.Reader( + "shapefiles/blockgroups_multishapefile.zip/blockgroups2" + ) as sf: for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 @@ -346,9 +585,9 @@ def test_reader_close_filelike(): """ # note uses an actual shapefile from # the projects "shapefiles" directory - shp = open("shapefiles/blockgroups.shp", mode='rb') - shx = open("shapefiles/blockgroups.shx", mode='rb') - dbf = open("shapefiles/blockgroups.dbf", mode='rb') + shp = open("shapefiles/blockgroups.shp", mode="rb") + shx = open("shapefiles/blockgroups.shx", mode="rb") + dbf = open("shapefiles/blockgroups.dbf", mode="rb") sf = shapefile.Reader(shp=shp, shx=shx, dbf=dbf) sf.close() @@ -389,9 +628,9 @@ def test_reader_context_filelike(): """ # note uses an actual shapefile from # the projects "shapefiles" directory - shp = open("shapefiles/blockgroups.shp", mode='rb') - shx = open("shapefiles/blockgroups.shx", mode='rb') - dbf = open("shapefiles/blockgroups.dbf", mode='rb') + shp = open("shapefiles/blockgroups.shp", mode="rb") + shx = open("shapefiles/blockgroups.shx", mode="rb") + dbf = open("shapefiles/blockgroups.dbf", mode="rb") with shapefile.Reader(shp=shp, shx=shx, dbf=dbf) as sf: pass @@ -410,7 +649,7 @@ def test_reader_shapefile_type(): is returned correctly. """ with shapefile.Reader("shapefiles/blockgroups") as sf: - assert sf.shapeType == 5 # 5 means Polygon + assert sf.shapeType == 5 # 5 means Polygon assert sf.shapeType == shapefile.POLYGON assert sf.shapeTypeName == "POLYGON" @@ -428,7 +667,7 @@ def test_reader_shapefile_length(): def test_shape_metadata(): with shapefile.Reader("shapefiles/blockgroups") as sf: shape = sf.shape(0) - assert shape.shapeType == 5 # Polygon + assert shape.shapeType == 5 # Polygon assert shape.shapeType == shapefile.POLYGON assert sf.shapeTypeName == "POLYGON" @@ -445,10 +684,10 @@ def test_reader_fields(): assert isinstance(fields, list) field = fields[0] - assert isinstance(field[0], str) # field name - assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type - assert isinstance(field[2], int) # field length - assert isinstance(field[3], int) # decimal length + assert isinstance(field[0], str) # field name + assert field[1] in ["C", "N", "F", "L", "D", "M"] # field type + assert isinstance(field[2], int) # field length + assert isinstance(field[3], int) # decimal length def test_reader_shapefile_extension_ignored(): @@ -484,7 +723,7 @@ def test_reader_dbf_only(): with shapefile.Reader(dbf="shapefiles/blockgroups.dbf") as sf: assert len(sf) == 663 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_shp_shx_only(): @@ -493,7 +732,9 @@ def test_reader_shp_shx_only(): shp and shx argument to the shapefile reader reads just the shp and shx file. """ - with shapefile.Reader(shp="shapefiles/blockgroups.shp", shx="shapefiles/blockgroups.shx") as sf: + with shapefile.Reader( + shp="shapefiles/blockgroups.shp", shx="shapefiles/blockgroups.shx" + ) as sf: assert len(sf) == 663 shape = sf.shape(3) assert len(shape.points) == 173 @@ -505,12 +746,14 @@ def test_reader_shp_dbf_only(): shp and shx argument to the shapefile reader reads just the shp and dbf file. """ - with shapefile.Reader(shp="shapefiles/blockgroups.shp", dbf="shapefiles/blockgroups.dbf") as sf: + with shapefile.Reader( + shp="shapefiles/blockgroups.shp", dbf="shapefiles/blockgroups.dbf" + ) as sf: assert len(sf) == 663 shape = sf.shape(3) assert len(shape.points) == 173 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_shp_only(): @@ -534,7 +777,7 @@ def test_reader_filelike_dbf_only(): with shapefile.Reader(dbf=open("shapefiles/blockgroups.dbf", "rb")) as sf: assert len(sf) == 663 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_filelike_shp_shx_only(): @@ -543,7 +786,10 @@ def test_reader_filelike_shp_shx_only(): shp and shx argument to the shapefile reader reads just the shp and shx file. """ - with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb"), shx=open("shapefiles/blockgroups.shx", "rb")) as sf: + with shapefile.Reader( + shp=open("shapefiles/blockgroups.shp", "rb"), + shx=open("shapefiles/blockgroups.shx", "rb"), + ) as sf: assert len(sf) == 663 shape = sf.shape(3) assert len(shape.points) == 173 @@ -555,12 +801,15 @@ def test_reader_filelike_shp_dbf_only(): shp and shx argument to the shapefile reader reads just the shp and dbf file. """ - with shapefile.Reader(shp=open("shapefiles/blockgroups.shp", "rb"), dbf=open("shapefiles/blockgroups.dbf", "rb")) as sf: + with shapefile.Reader( + shp=open("shapefiles/blockgroups.shp", "rb"), + dbf=open("shapefiles/blockgroups.dbf", "rb"), + ) as sf: assert len(sf) == 663 shape = sf.shape(3) assert len(shape.points) == 173 record = sf.record(3) - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_reader_filelike_shp_only(): @@ -619,7 +868,9 @@ def test_record_attributes(fields=None): else: # default all fields record = full_record - fields = [field[0] for field in sf.fields[1:]] # fieldnames, sans del flag + fields = [ + field[0] for field in sf.fields[1:] + ] # fieldnames, sans del flag # check correct length assert len(record) == len(set(fields)) # check record values (should be in same order as shapefile fields) @@ -627,7 +878,9 @@ def test_record_attributes(fields=None): for field in sf.fields: field_name = field[0] if field_name in fields: - assert record[i] == record[field_name] == getattr(record, field_name) + assert ( + record[i] == record[field_name] == getattr(record, field_name) + ) i += 1 @@ -636,7 +889,7 @@ def test_record_subfields(): Assert that reader correctly retrieves only a subset of fields when specified. """ - fields = ["AREA","POP1990","MALES","FEMALES","MOBILEHOME"] + fields = ["AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"] test_record_attributes(fields=fields) @@ -646,7 +899,7 @@ def test_record_subfields_unordered(): of fields when specified, given in random order but retrieved in the order of the shapefile fields. """ - fields = sorted(["AREA","POP1990","MALES","FEMALES","MOBILEHOME"]) + fields = sorted(["AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"]) test_record_attributes(fields=fields) @@ -654,7 +907,7 @@ def test_record_subfields_delflag_notvalid(): """ Assert that reader does not consider DeletionFlag as a valid field name. """ - fields = ["DeletionFlag","AREA","POP1990","MALES","FEMALES","MOBILEHOME"] + fields = ["DeletionFlag", "AREA", "POP1990", "MALES", "FEMALES", "MOBILEHOME"] with pytest.raises(ValueError): test_record_attributes(fields=fields) @@ -664,7 +917,7 @@ def test_record_subfields_duplicates(): Assert that reader correctly retrieves only a subset of fields when specified, handling duplicate input fields. """ - fields = ["AREA","AREA","AREA","MALES","MALES","MOBILEHOME"] + fields = ["AREA", "AREA", "AREA", "MALES", "MALES", "MOBILEHOME"] test_record_attributes(fields=fields) # check that only 3 values with shapefile.Reader("shapefiles/blockgroups") as sf: @@ -709,13 +962,13 @@ def test_record_oid(): record = sf.record(i) assert record.oid == i - for i,record in enumerate(sf.records()): + for i, record in enumerate(sf.records()): assert record.oid == i - for i,record in enumerate(sf.iterRecords()): + for i, record in enumerate(sf.iterRecords()): assert record.oid == i - for i,shaperec in enumerate(sf.iterShapeRecords()): + for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.record.oid == i @@ -729,13 +982,13 @@ def test_shape_oid(): shape = sf.shape(i) assert shape.oid == i - for i,shape in enumerate(sf.shapes()): + for i, shape in enumerate(sf.shapes()): assert shape.oid == i - for i,shape in enumerate(sf.iterShapes()): + for i, shape in enumerate(sf.iterShapes()): assert shape.oid == i - for i,shaperec in enumerate(sf.iterShapeRecords()): + for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.shape.oid == i @@ -745,27 +998,29 @@ def test_shape_oid_no_shx(): its index in the shapefile, when shx file is missing. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - dbf = open(basename + ".dbf", 'rb') - with shapefile.Reader(shp=shp, dbf=dbf) as sf, \ - shapefile.Reader(basename) as sf_expected: + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") + with ( + shapefile.Reader(shp=shp, dbf=dbf) as sf, + shapefile.Reader(basename) as sf_expected, + ): for i in range(len(sf)): shape = sf.shape(i) assert shape.oid == i shape_expected = sf_expected.shape(i) assert shape.__geo_interface__ == shape_expected.__geo_interface__ - for i,shape in enumerate(sf.shapes()): + for i, shape in enumerate(sf.shapes()): assert shape.oid == i shape_expected = sf_expected.shape(i) assert shape.__geo_interface__ == shape_expected.__geo_interface__ - for i,shape in enumerate(sf.iterShapes()): + for i, shape in enumerate(sf.iterShapes()): assert shape.oid == i shape_expected = sf_expected.shape(i) assert shape.__geo_interface__ == shape_expected.__geo_interface__ - for i,shaperec in enumerate(sf.iterShapeRecords()): + for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.shape.oid == i shape_expected = sf_expected.shape(i) assert shaperec.shape.__geo_interface__ == shape_expected.__geo_interface__ @@ -791,8 +1046,8 @@ def test_reader_offsets_no_shx(): the offsets unless necessary, i.e. reading all the shapes. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - dbf = open(basename + ".dbf", 'rb') + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") with shapefile.Reader(shp=shp, dbf=dbf) as sf: # offsets should not be built during loading assert not sf._offsets @@ -805,7 +1060,6 @@ def test_reader_offsets_no_shx(): assert len(sf._offsets) == len(shapes) - def test_reader_numshapes(): """ Assert that reader reads the numShapes attribute from the @@ -826,8 +1080,8 @@ def test_reader_numshapes_no_shx(): reading all the shapes will set the numShapes attribute. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - dbf = open(basename + ".dbf", 'rb') + shp = open(basename + ".shp", "rb") + dbf = open(basename + ".dbf", "rb") with shapefile.Reader(shp=shp, dbf=dbf) as sf: # numShapes should be unknown due to missing shx file assert sf.numShapes == None @@ -861,7 +1115,7 @@ def test_reader_len_dbf_only(): is equal to length of all records. """ basename = "shapefiles/blockgroups" - dbf = open(basename + ".dbf", 'rb') + dbf = open(basename + ".dbf", "rb") with shapefile.Reader(dbf=dbf) as sf: assert len(sf) == len(sf.records()) @@ -872,8 +1126,8 @@ def test_reader_len_no_dbf(): is equal to length of all shapes. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') - shx = open(basename + ".shx", 'rb') + shp = open(basename + ".shp", "rb") + shx = open(basename + ".shx", "rb") with shapefile.Reader(shp=shp, shx=shx) as sf: assert len(sf) == len(sf.shapes()) @@ -884,7 +1138,7 @@ def test_reader_len_no_dbf_shx(): is equal to length of all shapes. """ basename = "shapefiles/blockgroups" - shp = open(basename + ".shp", 'rb') + shp = open(basename + ".shp", "rb") with shapefile.Reader(shp=shp) as sf: assert len(sf) == len(sf.shapes()) @@ -902,10 +1156,10 @@ def test_reader_corrupt_files(): # add 10 line geoms for _ in range(10): w.record("value") - w.line([[(1,1),(1,2),(2,2)]]) + w.line([[(1, 1), (1, 2), (2, 2)]]) # add junk byte data to end of dbf and shp files - w.dbf.write(b'12345') - w.shp.write(b'12345') + w.dbf.write(b"12345") + w.shp.write(b"12345") # read the corrupt shapefile and assert that it reads correctly with shapefile.Reader(basename) as sf: @@ -958,7 +1212,7 @@ def test_bboxfilter_shapes(): # compare assert len(shapes) == len(manual) # check that they line up - for shape,man in zip(shapes,manual): + for shape, man in zip(shapes, manual): assert shape.oid == man.oid assert shape.__geo_interface__ == man.__geo_interface__ @@ -991,7 +1245,7 @@ def test_bboxfilter_itershapes(): # compare assert len(shapes) == len(manual) # check that they line up - for shape,man in zip(shapes,manual): + for shape, man in zip(shapes, manual): assert shape.oid == man.oid assert shape.__geo_interface__ == man.__geo_interface__ @@ -1031,7 +1285,7 @@ def test_bboxfilter_shaperecords(): # compare assert len(shaperecs) == len(manual) # check that they line up - for shaperec,man in zip(shaperecs,manual): + for shaperec, man in zip(shaperecs, manual): # oids assert shaperec.shape.oid == shaperec.record.oid # same shape as manual @@ -1059,7 +1313,7 @@ def test_bboxfilter_itershaperecords(): # compare assert len(shaperecs) == len(manual) # check that they line up - for shaperec,man in zip(shaperecs,manual): + for shaperec, man in zip(shaperecs, manual): # oids assert shaperec.shape.oid == shaperec.record.oid # same shape as manual @@ -1112,7 +1366,7 @@ def test_shaperecord_record(): shaperec = sf.shapeRecord(3) record = shaperec.record - assert record[1:3] == ['060750601001', 4715] + assert record[1:3] == ["060750601001", 4715] def test_write_field_name_limit(tmpdir): @@ -1121,11 +1375,11 @@ def test_write_field_name_limit(tmpdir): """ filename = tmpdir.join("test.shp").strpath with shapefile.Writer(filename) as writer: - writer.field('a'*5, 'C') # many under length limit - writer.field('a'*9, 'C') # 1 under length limit - writer.field('a'*10, 'C') # at length limit - writer.field('a'*11, 'C') # 1 over length limit - writer.field('a'*20, 'C') # many over limit + writer.field("a" * 5, "C") # many under length limit + writer.field("a" * 9, "C") # 1 under length limit + writer.field("a" * 10, "C") # at length limit + writer.field("a" * 11, "C") # 1 over length limit + writer.field("a" * 20, "C") # many over limit with shapefile.Reader(filename) as reader: fields = reader.fields[1:] @@ -1143,7 +1397,7 @@ def test_write_shp_only(tmpdir): creates just a shp file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(shp=filename+'.shp') as writer: + with shapefile.Writer(shp=filename + ".shp") as writer: writer.point(1, 1) assert writer.shp and not writer.shx and not writer.dbf assert writer.shpNum == 1 @@ -1151,19 +1405,22 @@ def test_write_shp_only(tmpdir): assert writer.shp.closed is True # assert test.shp exists - assert os.path.exists(filename+'.shp') + assert os.path.exists(filename + ".shp") # test that can read shapes - with shapefile.Reader(shp=filename+'.shp') as reader: + with shapefile.Reader(shp=filename + ".shp") as reader: assert reader.shp and not reader.shx and not reader.dbf - assert (reader.numRecords, reader.numShapes) == (None, None) # numShapes is unknown in the absence of shx file + assert (reader.numRecords, reader.numShapes) == ( + None, + None, + ) # numShapes is unknown in the absence of shx file assert len(reader.shapes()) == 1 # assert test.shx does not exist - assert not os.path.exists(filename+'.shx') + assert not os.path.exists(filename + ".shx") # assert test.dbf does not exist - assert not os.path.exists(filename+'.dbf') + assert not os.path.exists(filename + ".dbf") def test_write_shp_shx_only(tmpdir): @@ -1173,7 +1430,7 @@ def test_write_shp_shx_only(tmpdir): creates just a shp and shx file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(shp=filename+'.shp', shx=filename+'.shx') as writer: + with shapefile.Writer(shp=filename + ".shp", shx=filename + ".shx") as writer: writer.point(1, 1) assert writer.shp and writer.shx and not writer.dbf assert writer.shpNum == 1 @@ -1181,21 +1438,21 @@ def test_write_shp_shx_only(tmpdir): assert writer.shp.closed is writer.shx.closed is True # assert test.shp exists - assert os.path.exists(filename+'.shp') + assert os.path.exists(filename + ".shp") # assert test.shx exists - assert os.path.exists(filename+'.shx') + assert os.path.exists(filename + ".shx") # test that can read shapes and offsets - with shapefile.Reader(shp=filename+'.shp', shx=filename+'.shx') as reader: + with shapefile.Reader(shp=filename + ".shp", shx=filename + ".shx") as reader: assert reader.shp and reader.shx and not reader.dbf assert (reader.numRecords, reader.numShapes) == (None, 1) - reader.shape(0) # trigger reading of shx offsets + reader.shape(0) # trigger reading of shx offsets assert len(reader._offsets) == 1 assert len(reader.shapes()) == 1 # assert test.dbf does not exist - assert not os.path.exists(filename+'.dbf') + assert not os.path.exists(filename + ".dbf") def test_write_shp_dbf_only(tmpdir): @@ -1205,9 +1462,9 @@ def test_write_shp_dbf_only(tmpdir): creates just a shp and dbf file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(shp=filename+'.shp', dbf=filename+'.dbf') as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + with shapefile.Writer(shp=filename + ".shp", dbf=filename + ".dbf") as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") writer.point(1, 1) assert writer.shp and not writer.shx and writer.dbf assert writer.shpNum == writer.recNum == 1 @@ -1215,20 +1472,23 @@ def test_write_shp_dbf_only(tmpdir): assert writer.shp.closed is writer.dbf.closed is True # assert test.shp exists - assert os.path.exists(filename+'.shp') + assert os.path.exists(filename + ".shp") # assert test.dbf exists - assert os.path.exists(filename+'.dbf') + assert os.path.exists(filename + ".dbf") # test that can read records and shapes - with shapefile.Reader(shp=filename+'.shp', dbf=filename+'.dbf') as reader: + with shapefile.Reader(shp=filename + ".shp", dbf=filename + ".dbf") as reader: assert reader.shp and not reader.shx and reader.dbf - assert (reader.numRecords, reader.numShapes) == (1, None) # numShapes is unknown in the absence of shx file + assert (reader.numRecords, reader.numShapes) == ( + 1, + None, + ) # numShapes is unknown in the absence of shx file assert len(reader.records()) == 1 assert len(reader.shapes()) == 1 # assert test.shx does not exist - assert not os.path.exists(filename+'.shx') + assert not os.path.exists(filename + ".shx") def test_write_dbf_only(tmpdir): @@ -1238,28 +1498,28 @@ def test_write_dbf_only(tmpdir): creates just a dbf file. """ filename = tmpdir.join("test").strpath - with shapefile.Writer(dbf=filename+'.dbf') as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + with shapefile.Writer(dbf=filename + ".dbf") as writer: + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") assert not writer.shp and not writer.shx and writer.dbf assert writer.recNum == 1 assert len(writer) == 1 assert writer.dbf.closed is True # assert test.dbf exists - assert os.path.exists(filename+'.dbf') + assert os.path.exists(filename + ".dbf") # test that can read records - with shapefile.Reader(dbf=filename+'.dbf') as reader: + with shapefile.Reader(dbf=filename + ".dbf") as reader: assert not writer.shp and not writer.shx and writer.dbf assert (reader.numRecords, reader.numShapes) == (1, None) assert len(reader.records()) == 1 # assert test.shp does not exist - assert not os.path.exists(filename+'.shp') + assert not os.path.exists(filename + ".shp") # assert test.shx does not exist - assert not os.path.exists(filename+'.shx') + assert not os.path.exists(filename + ".shx") def test_write_default_shp_shx_dbf(tmpdir): @@ -1270,8 +1530,8 @@ def test_write_default_shp_shx_dbf(tmpdir): """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename) as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") writer.null() # assert shp, shx, dbf files exist @@ -1288,8 +1548,8 @@ def test_write_pathlike(tmpdir): filename = tmpdir.join("test") assert not isinstance(filename, str) with shapefile.Writer(filename) as writer: - writer.field('field1', 'C') - writer.record('value') + writer.field("field1", "C") + writer.record("value") writer.null() assert (filename + ".shp").ensure() assert (filename + ".shx").ensure() @@ -1300,12 +1560,12 @@ def test_write_filelike(tmpdir): """ Assert that file-like objects are written correctly. """ - shp = open(tmpdir.join("test.shp").strpath, mode='wb+') - shx = open(tmpdir.join("test.shx").strpath, mode='wb+') - dbf = open(tmpdir.join("test.dbf").strpath, mode='wb+') + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") with shapefile.Writer(shx=shx, dbf=dbf, shp=shp) as writer: - writer.field('field1', 'C') # required to create a valid dbf file - writer.record('value') + writer.field("field1", "C") # required to create a valid dbf file + writer.record("value") writer.null() # test that filelike objects were written correctly @@ -1320,9 +1580,9 @@ def test_write_close_path(tmpdir): closes the shp, shx, and dbf files on exit, if given paths. """ - sf = shapefile.Writer(tmpdir.join('test')) - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + sf = shapefile.Writer(tmpdir.join("test")) + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() sf.close() @@ -1331,7 +1591,7 @@ def test_write_close_path(tmpdir): assert sf.shx.closed is True # test that opens and reads correctly after - with shapefile.Reader(tmpdir.join('test')) as reader: + with shapefile.Reader(tmpdir.join("test")) as reader: assert len(reader) == 1 assert reader.shape(0).shapeType == shapefile.NULL @@ -1342,12 +1602,12 @@ def test_write_close_filelike(tmpdir): leaves the shp, shx, and dbf files open on exit, if given filelike objects. """ - shp = open(tmpdir.join("test.shp").strpath, mode='wb+') - shx = open(tmpdir.join("test.shx").strpath, mode='wb+') - dbf = open(tmpdir.join("test.dbf").strpath, mode='wb+') + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") sf = shapefile.Writer(shx=shx, dbf=dbf, shp=shp) - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() sf.close() @@ -1367,9 +1627,9 @@ def test_write_context_path(tmpdir): closes the shp, shx, and dbf files on exit, if given paths. """ - with shapefile.Writer(tmpdir.join('test')) as sf: - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + with shapefile.Writer(tmpdir.join("test")) as sf: + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() assert sf.shp.closed is True @@ -1377,7 +1637,7 @@ def test_write_context_path(tmpdir): assert sf.shx.closed is True # test that opens and reads correctly after - with shapefile.Reader(tmpdir.join('test')) as reader: + with shapefile.Reader(tmpdir.join("test")) as reader: assert len(reader) == 1 assert reader.shape(0).shapeType == shapefile.NULL @@ -1388,12 +1648,12 @@ def test_write_context_filelike(tmpdir): leaves the shp, shx, and dbf files open on exit, if given filelike objects. """ - shp = open(tmpdir.join("test.shp").strpath, mode='wb+') - shx = open(tmpdir.join("test.shx").strpath, mode='wb+') - dbf = open(tmpdir.join("test.dbf").strpath, mode='wb+') + shp = open(tmpdir.join("test.shp").strpath, mode="wb+") + shx = open(tmpdir.join("test.shx").strpath, mode="wb+") + dbf = open(tmpdir.join("test.dbf").strpath, mode="wb+") with shapefile.Writer(shx=shx, dbf=dbf, shp=shp) as sf: - sf.field('field1', 'C') # required to create a valid dbf file - sf.record('value') + sf.field("field1", "C") # required to create a valid dbf file + sf.record("value") sf.null() assert sf.shp.closed is False @@ -1415,7 +1675,7 @@ def test_write_shapefile_extension_ignored(tmpdir): ext = ".abc" filename = tmpdir.join(base + ext).strpath with shapefile.Writer(filename) as writer: - writer.field('field1', 'C') # required to create a valid dbf file + writer.field("field1", "C") # required to create a valid dbf file # assert shp, shx, dbf files exist basepath = tmpdir.join(base).strpath @@ -1436,12 +1696,12 @@ def test_write_record(tmpdir): with shapefile.Writer(filename) as writer: writer.autoBalance = True - writer.field('one', 'C') - writer.field('two', 'C') - writer.field('three', 'C') - writer.field('four', 'C') + writer.field("one", "C") + writer.field("two", "C") + writer.field("three", "C") + writer.field("four", "C") - values = ['one','two','three','four'] + values = ["one", "two", "three", "four"] writer.record(*values) writer.record(*values) @@ -1463,12 +1723,12 @@ def test_write_partial_record(tmpdir): with shapefile.Writer(filename) as writer: writer.autoBalance = True - writer.field('one', 'C') - writer.field('two', 'C') - writer.field('three', 'C') - writer.field('four', 'C') + writer.field("one", "C") + writer.field("two", "C") + writer.field("three", "C") + writer.field("four", "C") - values = ['one','two'] + values = ["one", "two"] writer.record(*values) writer.record(*values) @@ -1478,7 +1738,7 @@ def test_write_partial_record(tmpdir): with shapefile.Reader(filename) as reader: expected = list(values) - expected.extend(['','']) + expected.extend(["", ""]) for record in reader.iterRecords(): assert record == expected @@ -1491,13 +1751,13 @@ def test_write_geojson(tmpdir): """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename) as w: - w.field('TEXT', 'C') - w.field('NUMBER', 'N') - w.field('DATE', 'D') - w.record('text', 123, datetime.date(1898,1,30)) - w.record('text', 123, [1998,1,30]) - w.record('text', 123, '19980130') - w.record('text', 123, '-9999999') # faulty date + w.field("TEXT", "C") + w.field("NUMBER", "N") + w.field("DATE", "D") + w.record("text", 123, datetime.date(1898, 1, 30)) + w.record("text", 123, [1998, 1, 30]) + w.record("text", 123, "19980130") + w.record("text", 123, "-9999999") # faulty date w.record(None, None, None) w.null() w.null() @@ -1512,7 +1772,9 @@ def test_write_geojson(tmpdir): assert json.dumps(r.__geo_interface__) -shape_types = [k for k in shapefile.SHAPETYPE_LOOKUP.keys() if k != 31] # exclude multipatch +shape_types = [ + k for k in shapefile.SHAPETYPE_LOOKUP.keys() if k != 31 +] # exclude multipatch @pytest.mark.parametrize("shape_type", shape_types) @@ -1522,7 +1784,7 @@ def test_write_empty_shapefile(tmpdir, shape_type): """ filename = tmpdir.join("test").strpath with shapefile.Writer(filename, shapeType=shape_type) as w: - w.field('field1', 'C') # required to create a valid dbf file + w.field("field1", "C") # required to create a valid dbf file with shapefile.Reader(filename) as r: # test correct shape type From 2601071cc5da511132f46b55afadcaa77fe960a9 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:50:25 +0100 Subject: [PATCH 081/115] Run ruff --fix --unsafe-fixes --- shapefile.py | 4 ++-- test_shapefile.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index 22c217e8..7b4f7ba2 100644 --- a/shapefile.py +++ b/shapefile.py @@ -543,7 +543,7 @@ def __geo_interface__(self): ps = None coordinates = [] for part in self.parts: - if ps == None: + if ps is None: ps = part continue else: @@ -1474,7 +1474,7 @@ def __shapeIndex(self, i=None): in the .shx index file.""" shx = self.shx # Return None if no shx or no index requested - if not shx or i == None: + if not shx or i is None: return None # At this point, we know the shx file exists if not self._offsets: diff --git a/test_shapefile.py b/test_shapefile.py index ae173ce1..6e1ebd09 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1068,7 +1068,7 @@ def test_reader_numshapes(): basename = "shapefiles/blockgroups" with shapefile.Reader(basename) as sf: # numShapes should be set during loading - assert sf.numShapes != None + assert sf.numShapes is not None # numShapes should equal the number of shapes assert sf.numShapes == len(sf.shapes()) @@ -1084,7 +1084,7 @@ def test_reader_numshapes_no_shx(): dbf = open(basename + ".dbf", "rb") with shapefile.Reader(shp=shp, dbf=dbf) as sf: # numShapes should be unknown due to missing shx file - assert sf.numShapes == None + assert sf.numShapes is None # numShapes should be set after reading all the shapes shapes = sf.shapes() assert sf.numShapes == len(shapes) From 9801c4108f9ba86a598d1fcc029ac8b93cac4d7a Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:13:16 +0100 Subject: [PATCH 082/115] Restore Python <=3.9 style nested with statements --- test_shapefile.py | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/test_shapefile.py b/test_shapefile.py index 6e1ebd09..89ce46d1 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1000,30 +1000,28 @@ def test_shape_oid_no_shx(): basename = "shapefiles/blockgroups" shp = open(basename + ".shp", "rb") dbf = open(basename + ".dbf", "rb") - with ( - shapefile.Reader(shp=shp, dbf=dbf) as sf, - shapefile.Reader(basename) as sf_expected, - ): - for i in range(len(sf)): - shape = sf.shape(i) - assert shape.oid == i - shape_expected = sf_expected.shape(i) - assert shape.__geo_interface__ == shape_expected.__geo_interface__ - - for i, shape in enumerate(sf.shapes()): - assert shape.oid == i - shape_expected = sf_expected.shape(i) - assert shape.__geo_interface__ == shape_expected.__geo_interface__ - - for i, shape in enumerate(sf.iterShapes()): - assert shape.oid == i - shape_expected = sf_expected.shape(i) - assert shape.__geo_interface__ == shape_expected.__geo_interface__ - - for i, shaperec in enumerate(sf.iterShapeRecords()): - assert shaperec.shape.oid == i - shape_expected = sf_expected.shape(i) - assert shaperec.shape.__geo_interface__ == shape_expected.__geo_interface__ + with shapefile.Reader(shp=shp, dbf=dbf) as sf: + with shapefile.Reader(basename) as sf_expected: + for i in range(len(sf)): + shape = sf.shape(i) + assert shape.oid == i + shape_expected = sf_expected.shape(i) + assert shape.__geo_interface__ == shape_expected.__geo_interface__ + + for i, shape in enumerate(sf.shapes()): + assert shape.oid == i + shape_expected = sf_expected.shape(i) + assert shape.__geo_interface__ == shape_expected.__geo_interface__ + + for i, shape in enumerate(sf.iterShapes()): + assert shape.oid == i + shape_expected = sf_expected.shape(i) + assert shape.__geo_interface__ == shape_expected.__geo_interface__ + + for i, shaperec in enumerate(sf.iterShapeRecords()): + assert shaperec.shape.oid == i + shape_expected = sf_expected.shape(i) + assert shaperec.shape.__geo_interface__ == shape_expected.__geo_interface__ def test_reader_offsets(): From 9953965897fc826d4b2ab099aad5b030b0e7de53 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:21:37 +0100 Subject: [PATCH 083/115] Remove trailing comma. after unpacked list args in func call (*args). Syntax error in Python 2. --- shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index 7b4f7ba2..f52a065d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2338,7 +2338,7 @@ def __shpRecord(self, s): f.write( pack( "<%sd" % len(s.m), - *[m if m is not None else NODATA for m in s.m], + *[m if m is not None else NODATA for m in s.m] ) ) else: From 742bfc7274b3deb68e32157e76232f1edb1222b3 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:25:37 +0100 Subject: [PATCH 084/115] Remove Ruff-format pre-commit hook --- .pre-commit-config.yaml | 5 +---- shapefile.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f065f594..18f0b5dd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,4 @@ repos: hooks: - id: isort name: isort (python) -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.4 - hooks: - - id: ruff-format + diff --git a/shapefile.py b/shapefile.py index f52a065d..7b4f7ba2 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2338,7 +2338,7 @@ def __shpRecord(self, s): f.write( pack( "<%sd" % len(s.m), - *[m if m is not None else NODATA for m in s.m] + *[m if m is not None else NODATA for m in s.m], ) ) else: From 27b87df6cfeec9672d859124779ab8ef18f9807e Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:29:21 +0100 Subject: [PATCH 085/115] Remove trailing comma again --- shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shapefile.py b/shapefile.py index 7b4f7ba2..f52a065d 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2338,7 +2338,7 @@ def __shpRecord(self, s): f.write( pack( "<%sd" % len(s.m), - *[m if m is not None else NODATA for m in s.m], + *[m if m is not None else NODATA for m in s.m] ) ) else: From eee61809b874bbcef33380ba4186d6ead43c8d44 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:40:50 +0100 Subject: [PATCH 086/115] Turn off formatter at the troublesome trailing comma --- .pre-commit-config.yaml | 5 ++++- pyproject.toml | 2 +- shapefile.py | 2 ++ test_shapefile.py | 4 +++- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 18f0b5dd..f065f594 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,4 +9,7 @@ repos: hooks: - id: isort name: isort (python) - +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.4 + hooks: + - id: ruff-format diff --git a/pyproject.toml b/pyproject.toml index 8cdddf81..df8e737f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ line-length = 88 indent-width = 4 # Assume Python 3.9 -target-version = "py39" +target-version = "py37" [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. diff --git a/shapefile.py b/shapefile.py index f52a065d..12af74d7 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2335,12 +2335,14 @@ def __shpRecord(self, s): try: if hasattr(s, "m"): # if m values are stored in attribute + # fmt: off f.write( pack( "<%sd" % len(s.m), *[m if m is not None else NODATA for m in s.m] ) ) + # fmt: on else: # if m values are stored as 3rd/4th dimension # 0-index position of m value is 3 if z type (x,y,z,m), or 2 if m type (x,y,m) diff --git a/test_shapefile.py b/test_shapefile.py index 89ce46d1..08561c6a 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -1021,7 +1021,9 @@ def test_shape_oid_no_shx(): for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.shape.oid == i shape_expected = sf_expected.shape(i) - assert shaperec.shape.__geo_interface__ == shape_expected.__geo_interface__ + assert ( + shaperec.shape.__geo_interface__ == shape_expected.__geo_interface__ + ) def test_reader_offsets(): From e41b03ca574ed79c58bad9dc9e72d84dc98af19b Mon Sep 17 00:00:00 2001 From: Lionel GUEZ Date: Fri, 11 Oct 2024 20:19:31 +0200 Subject: [PATCH 087/115] Add option `my_range` to method iterRecords Using iterRecords with a range option should be faster than calling record within a loop, since we avoid the multiple calls to seek. --- shapefile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/shapefile.py b/shapefile.py index 12af74d7..07bc3c03 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1809,7 +1809,7 @@ def records(self, fields=None): records.append(r) return records - def iterRecords(self, fields=None): + def iterRecords(self, fields=None, my_range=None): """Returns a generator of records in a dbf file. Useful for large shapefiles or dbf files. To only read some of the fields, specify the 'fields' arg as a @@ -1820,7 +1820,9 @@ def iterRecords(self, fields=None): f = self.__getFileObj(self.dbf) f.seek(self.__dbfHdrLength) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in xrange(self.numRecords): + if my_range is None: + my_range = xrange(self.numRecords) + for i in my_range: r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) From 4efef9fdb5a1d188a33316793cb964d33daa63df Mon Sep 17 00:00:00 2001 From: Lionel GUEZ Date: Tue, 15 Oct 2024 06:57:12 +0200 Subject: [PATCH 088/115] Revert "Add option `my_range` to method iterRecords" This reverts commit e41b03ca. JamesParrott pointed that I did not understand the way `__record` works: __record does not use oid to find the correct record, it just assumes it is the correct oid for the current position. --- shapefile.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/shapefile.py b/shapefile.py index 07bc3c03..12af74d7 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1809,7 +1809,7 @@ def records(self, fields=None): records.append(r) return records - def iterRecords(self, fields=None, my_range=None): + def iterRecords(self, fields=None): """Returns a generator of records in a dbf file. Useful for large shapefiles or dbf files. To only read some of the fields, specify the 'fields' arg as a @@ -1820,9 +1820,7 @@ def iterRecords(self, fields=None, my_range=None): f = self.__getFileObj(self.dbf) f.seek(self.__dbfHdrLength) fieldTuples, recLookup, recStruct = self.__recordFields(fields) - if my_range is None: - my_range = xrange(self.numRecords) - for i in my_range: + for i in xrange(self.numRecords): r = self.__record( oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct ) From 811d32909e0b4889efa2357dd32682e324222225 Mon Sep 17 00:00:00 2001 From: Lionel GUEZ Date: Tue, 15 Oct 2024 08:02:03 +0200 Subject: [PATCH 089/115] Add method `iterRecords_range` Using the method `iterRecords_range` should be somewhat faster than calling the method `record` within a loop, since we avoid the repeated calls to seek inside `record`. --- shapefile.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/shapefile.py b/shapefile.py index 12af74d7..906cdda7 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1827,6 +1827,32 @@ def iterRecords(self, fields=None): if r: yield r + def iterRecords_range(self, start, stop, fields=None): + """Returns a generator of records in a dbf file, for a range + of oid. Useful for large shapefiles or dbf files. To only + read some of the fields, specify the 'fields' arg as a list of + one or more fieldnames. + + """ + if self.numRecords is None: + self.__dbfHeader() + f = self.__getFileObj(self.dbf) + start = self.__restrictIndex(start) + if abs(stop) > self.numRecords: + raise IndexError("Record index out of range.") + if stop < 0: + stop = range(self.numRecords)[stop] + recSize = self.__recordLength + f.seek(0) + f.seek(self.__dbfHdrLength + (start * recSize)) + fieldTuples, recLookup, recStruct = self.__recordFields(fields) + for i in xrange(start, stop): + r = self.__record( + oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct + ) + if r: + yield r + def shapeRecord(self, i=0, fields=None, bbox=None): """Returns a combination geometry and attribute record for the supplied record index. From a6c739a81ed7cf9a738ad94a0554aaa7d58e4cad Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 20:29:45 +0100 Subject: [PATCH 090/115] Combine the methods, giving start and stop default values. Remove f.seek(0) before f.seek(..) --- shapefile.py | 43 ++++++++++++++++++------------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/shapefile.py b/shapefile.py index 906cdda7..0f82d803 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1325,7 +1325,9 @@ def __restrictIndex(self, i): if self.numRecords: rmax = self.numRecords - 1 if abs(i) > rmax: - raise IndexError("Shape or Record index out of range.") + raise IndexError( + "Shape or Record index: %s out of range. Max index: %s" % (i, rmax) + ) if i < 0: i = range(self.numRecords)[i] return i @@ -1809,41 +1811,32 @@ def records(self, fields=None): records.append(r) return records - def iterRecords(self, fields=None): + def iterRecords(self, fields=None, start=0, stop=None): """Returns a generator of records in a dbf file. Useful for large shapefiles or dbf files. To only read some of the fields, specify the 'fields' arg as a list of one or more fieldnames. - """ - if self.numRecords is None: - self.__dbfHeader() - f = self.__getFileObj(self.dbf) - f.seek(self.__dbfHdrLength) - fieldTuples, recLookup, recStruct = self.__recordFields(fields) - for i in xrange(self.numRecords): - r = self.__record( - oid=i, fieldTuples=fieldTuples, recLookup=recLookup, recStruct=recStruct - ) - if r: - yield r - - def iterRecords_range(self, start, stop, fields=None): - """Returns a generator of records in a dbf file, for a range - of oid. Useful for large shapefiles or dbf files. To only - read some of the fields, specify the 'fields' arg as a list of - one or more fieldnames. - + By default yields all records. Otherwise, specify start + (default: 0) or stop (default: number_of_records) + to only yield record numbers i, where + start <= i < stop, (or + start <= i < number_of_records + stop + if stop < 0). """ if self.numRecords is None: self.__dbfHeader() f = self.__getFileObj(self.dbf) start = self.__restrictIndex(start) - if abs(stop) > self.numRecords: - raise IndexError("Record index out of range.") - if stop < 0: + if stop is None: + stop = self.numRecords + elif abs(stop) > self.numRecords: + raise IndexError( + "abs(stop): %s exceeds number of records: %s." + % (abs(stop), self.numRecords) + ) + elif stop < 0: stop = range(self.numRecords)[stop] recSize = self.__recordLength - f.seek(0) f.seek(self.__dbfHdrLength + (start * recSize)) fieldTuples, recLookup, recStruct = self.__recordFields(fields) for i in xrange(start, stop): From 0fdcab16fa3e4a29ca34cb201fe7cc9dacc99638 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:19:15 +0100 Subject: [PATCH 091/115] Add _Record.__eq__ --- shapefile.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/shapefile.py b/shapefile.py index 0f82d803..ce335b59 100644 --- a/shapefile.py +++ b/shapefile.py @@ -854,6 +854,17 @@ def __dir__(self): ) # plus field names (random order if Python version < 3.6) return default + fnames + def __eq__(self, other): + return (isinstance(other, self.__class__) and + self.__field_positions == other.__field_positions and + self.oid == other.oid and + len(self) == len(other) and + all(val_self == val_other + for val_self, val_other in izip(self, other) + ) + ) + + class ShapeRecord(object): """A ShapeRecord object containing a shape along with its attributes. From 74cf6dd96ef7c7d1fb4b64b14318eef03342ddc4 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:20:12 +0100 Subject: [PATCH 092/115] Let Ruff adjust code style (lead with ands, instead of trailing). --- shapefile.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/shapefile.py b/shapefile.py index ce335b59..04c7613f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -855,15 +855,13 @@ def __dir__(self): return default + fnames def __eq__(self, other): - return (isinstance(other, self.__class__) and - self.__field_positions == other.__field_positions and - self.oid == other.oid and - len(self) == len(other) and - all(val_self == val_other - for val_self, val_other in izip(self, other) - ) - ) - + return ( + isinstance(other, self.__class__) + and self.__field_positions == other.__field_positions + and self.oid == other.oid + and len(self) == len(other) + and all(val_self == val_other for val_self, val_other in izip(self, other)) + ) class ShapeRecord(object): From cc82ec82c4c14dda4b6bb802b46633504dc3ed15 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:22:57 +0100 Subject: [PATCH 093/115] Add test for iterRecords new start and stop args; ensure same Records returned as from Reader.record --- test_shapefile.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/test_shapefile.py b/test_shapefile.py index 08561c6a..619a45e5 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -3,6 +3,7 @@ """ import datetime +import itertools import json import os.path @@ -968,10 +969,33 @@ def test_record_oid(): for i, record in enumerate(sf.iterRecords()): assert record.oid == i + for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.record.oid == i +def test_iterRecords_start_stop(): + """ + Assert that Reader.iterRecords(start, stop) + returns the correct records, as if searched for + by Reader.. + """ + + + with shapefile.Reader("shapefiles/blockgroups") as sf: + + N = len(sf) + + # Arbitrary selection of start values + for start in [0, 1, 2, 3, 5, 11, 17, 33, 51, 103, 170, 234, 435, 543, N-3, N-2, N-1]: + for stop in range(start, len(sf)): + # test negative indexing from end, as well as + # positive values of stop, and its default + for stop_arg in (stop, stop - len(sf), None): + for record in sf.iterRecords(start = start, stop = stop): + assert record == sf.record(record.oid) + + def test_shape_oid(): """ Assert that the shape's oid attribute returns From 3f5641b1efa77e54302141f719777338e6ea45c8 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:23:27 +0100 Subject: [PATCH 094/115] Let Ruff reformat the list. --- test_shapefile.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/test_shapefile.py b/test_shapefile.py index 619a45e5..fa7e9469 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -969,7 +969,6 @@ def test_record_oid(): for i, record in enumerate(sf.iterRecords()): assert record.oid == i - for i, shaperec in enumerate(sf.iterShapeRecords()): assert shaperec.record.oid == i @@ -981,18 +980,34 @@ def test_iterRecords_start_stop(): by Reader.. """ - with shapefile.Reader("shapefiles/blockgroups") as sf: - N = len(sf) # Arbitrary selection of start values - for start in [0, 1, 2, 3, 5, 11, 17, 33, 51, 103, 170, 234, 435, 543, N-3, N-2, N-1]: + for start in [ + 0, + 1, + 2, + 3, + 5, + 11, + 17, + 33, + 51, + 103, + 170, + 234, + 435, + 543, + N - 3, + N - 2, + N - 1, + ]: for stop in range(start, len(sf)): # test negative indexing from end, as well as # positive values of stop, and its default for stop_arg in (stop, stop - len(sf), None): - for record in sf.iterRecords(start = start, stop = stop): + for record in sf.iterRecords(start=start, stop=stop): assert record == sf.record(record.oid) From 25a319932942c0a3226fc2e785760b08bb9a934c Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:29:10 +0100 Subject: [PATCH 095/115] Test default values. Pass in correct stop value. --- test_shapefile.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/test_shapefile.py b/test_shapefile.py index fa7e9469..b9b6b095 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -983,8 +983,9 @@ def test_iterRecords_start_stop(): with shapefile.Reader("shapefiles/blockgroups") as sf: N = len(sf) - # Arbitrary selection of start values - for start in [ + # Arbitrary selection of record indices + # (there are 663 records in blockgroups.dbf). + for i in [ 0, 1, 2, @@ -1003,11 +1004,17 @@ def test_iterRecords_start_stop(): N - 2, N - 1, ]: - for stop in range(start, len(sf)): + for record in sf.iterRecords(start=i): + assert record == sf.record(record.oid) + + for record in sf.iterRecords(stop=i): + assert record == sf.record(record.oid) + + for stop in range(i, len(sf)): # test negative indexing from end, as well as # positive values of stop, and its default - for stop_arg in (stop, stop - len(sf), None): - for record in sf.iterRecords(start=start, stop=stop): + for stop_arg in (stop, stop - len(sf)): + for record in sf.iterRecords(start=i, stop=stop_arg): assert record == sf.record(record.oid) From 4cead935eedd2980ba7392c012bbe063e3a2c9d7 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:34:37 +0100 Subject: [PATCH 096/115] Relax _Record.__eq__ - don't require equal oids (PyShp implementation detail, not a shapefile record property) --- shapefile.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/shapefile.py b/shapefile.py index 04c7613f..54499b2c 100644 --- a/shapefile.py +++ b/shapefile.py @@ -855,14 +855,10 @@ def __dir__(self): return default + fnames def __eq__(self, other): - return ( - isinstance(other, self.__class__) - and self.__field_positions == other.__field_positions - and self.oid == other.oid - and len(self) == len(other) - and all(val_self == val_other for val_self, val_other in izip(self, other)) - ) - + if isinstance(other, self.__class__): + if self.__field_positions != other.__field_positions: + return False + return list.__eq__(self, other) class ShapeRecord(object): """A ShapeRecord object containing a shape along with its attributes. From d4b3ed1fb8460b3dd677ddce372f9da7117d2150 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:35:34 +0100 Subject: [PATCH 097/115] Add a blank line to appease Ruff. --- shapefile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/shapefile.py b/shapefile.py index 54499b2c..2b22ba91 100644 --- a/shapefile.py +++ b/shapefile.py @@ -860,6 +860,7 @@ def __eq__(self, other): return False return list.__eq__(self, other) + class ShapeRecord(object): """A ShapeRecord object containing a shape along with its attributes. Provides the GeoJSON __geo_interface__ to return a Feature dictionary.""" From 6584f912968ce4e95f8a388291bd69245104e050 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 21:43:28 +0100 Subject: [PATCH 098/115] Remove unused import --- test_shapefile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test_shapefile.py b/test_shapefile.py index b9b6b095..a0173c29 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -3,7 +3,6 @@ """ import datetime -import itertools import json import os.path From 7cb08892eb0ab204219de51c84a6d7634d3ec8de Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Thu, 17 Oct 2024 22:16:31 +0100 Subject: [PATCH 099/115] Edit docstring --- test_shapefile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_shapefile.py b/test_shapefile.py index a0173c29..7984e91f 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -976,7 +976,7 @@ def test_iterRecords_start_stop(): """ Assert that Reader.iterRecords(start, stop) returns the correct records, as if searched for - by Reader.. + by index with Reader.record """ with shapefile.Reader("shapefiles/blockgroups") as sf: From af94c7d534a432d76f58b346ebd97fd73e1b98fa Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:03:27 +0100 Subject: [PATCH 100/115] Update README.md --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index 15f9f9b4..75be6f64 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,14 @@ part of your geospatial project. # Version Changes +## 2.3.x + +### New Features: +- Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. + +### Development: +- Code quality tools run on PyShp + ## 2.3.1 ### Bug fixes: @@ -1467,6 +1475,7 @@ Karim Bahgat karanrn Kurt Schwehr Kyle Kelley +Lionel Guez Louis Tiao Marcin Cuprjak mcuprjak From 763bd3340c25ad04f3d4fb71fbade379843bd670 Mon Sep 17 00:00:00 2001 From: JamesParrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:11:39 +0100 Subject: [PATCH 101/115] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 75be6f64..2a211cc3 100644 --- a/README.md +++ b/README.md @@ -99,6 +99,7 @@ part of your geospatial project. ### New Features: - Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. +- Equality comparisons between Records now also require the fields to be the same (and in the same order). ### Development: - Code quality tools run on PyShp From 62f3293e068e7e1e0341d186879bd3d864a351a2 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 18 Jul 2025 17:15:32 +0100 Subject: [PATCH 102/115] Devx: add .venv and venv to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index f6d63328..5b39efe1 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,5 @@ dist/ *.py[cod] .vscode .dmypy.json +.venv +venv From b43c820d0330d4a02e0f7f0806f47bd8588812d7 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 18 Jul 2025 21:54:25 +0100 Subject: [PATCH 103/115] Test on 3.13 latest and move 3.13 into main group. Drop ubuntu-latest, as right now it duplicates 24.04 --- .github/workflows/run_tests_hooks_and_tools.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml index 4c393a33..94995ebe 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -62,19 +62,15 @@ jobs: "3.10", "3.11", "3.12", - "3.13.0-rc.2", + "3.13", + "3.14.0-beta.4", ] os: [ "macos-latest", - "ubuntu-latest", "ubuntu-24.04", "windows-latest", ] - include: - - os: ubuntu-24.04 - python-version: "3.14.0-alpha.0" - - os: ubuntu-22.04 - python-version: "3.14.0-alpha.0" + runs-on: ${{ matrix.os }} steps: From b327f6b8ba2c52384cd16c7f6202f66e4b107882 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 18 Jul 2025 22:02:53 +0100 Subject: [PATCH 104/115] Qual. Replace 'if "ext" in dict_.keys()' with 'if "str" in dict_' --- shapefile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/shapefile.py b/shapefile.py index 2b22ba91..3a6bbe7e 100644 --- a/shapefile.py +++ b/shapefile.py @@ -1099,7 +1099,7 @@ def __init__(self, *args, **kwargs): return # Otherwise, load from separate shp/shx/dbf args (must be path or file-like) - if "shp" in kwargs.keys(): + if "shp" in kwargs: if hasattr(kwargs["shp"], "read"): self.shp = kwargs["shp"] # Copy if required @@ -1111,7 +1111,7 @@ def __init__(self, *args, **kwargs): (baseName, ext) = os.path.splitext(kwargs["shp"]) self.load_shp(baseName) - if "shx" in kwargs.keys(): + if "shx" in kwargs: if hasattr(kwargs["shx"], "read"): self.shx = kwargs["shx"] # Copy if required @@ -1123,7 +1123,7 @@ def __init__(self, *args, **kwargs): (baseName, ext) = os.path.splitext(kwargs["shx"]) self.load_shx(baseName) - if "dbf" in kwargs.keys(): + if "dbf" in kwargs: if hasattr(kwargs["dbf"], "read"): self.dbf = kwargs["dbf"] # Copy if required From 7a4b00f24162785f2906fe760b11096923462e81 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 18 Jul 2025 22:15:55 +0100 Subject: [PATCH 105/115] Replace another test of `in dict_.keys()` with a defaultdict, and simplify nearby code. --- shapefile.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/shapefile.py b/shapefile.py index 3a6bbe7e..271129eb 100644 --- a/shapefile.py +++ b/shapefile.py @@ -9,6 +9,7 @@ __version__ = "2.3.1" import array +import collections import io import logging import os @@ -393,12 +394,11 @@ def organize_polygon_rings(rings, return_errors=None): return polys # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) - exterior_bboxes = [ring_bbox(ring) for ring in exteriors] - for hole_i in hole_exteriors.keys(): + hole_exteriors = collections.defaultdict(list) + for hole_i in xrange(len(holes)): hole_bbox = ring_bbox(holes[hole_i]) - for ext_i, ext_bbox in enumerate(exterior_bboxes): - if bbox_contains(ext_bbox, hole_bbox): + for ext_i, ring in enumerate(exteriors): + if bbox_contains(ring_bbox(ring), hole_bbox): hole_exteriors[hole_i].append(ext_i) # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test From a9caa32a18e872ad0bf44641dfa4099cb1dcd4eb Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 18 Jul 2025 22:37:31 +0100 Subject: [PATCH 106/115] Revert "Replace another test of `in dict_.keys()` with a defaultdict, and simplify nearby code." This reverts commit 7a4b00f24162785f2906fe760b11096923462e81. --- shapefile.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/shapefile.py b/shapefile.py index 271129eb..3a6bbe7e 100644 --- a/shapefile.py +++ b/shapefile.py @@ -9,7 +9,6 @@ __version__ = "2.3.1" import array -import collections import io import logging import os @@ -394,11 +393,12 @@ def organize_polygon_rings(rings, return_errors=None): return polys # first determine each hole's candidate exteriors based on simple bbox contains test - hole_exteriors = collections.defaultdict(list) - for hole_i in xrange(len(holes)): + hole_exteriors = dict([(hole_i, []) for hole_i in xrange(len(holes))]) + exterior_bboxes = [ring_bbox(ring) for ring in exteriors] + for hole_i in hole_exteriors.keys(): hole_bbox = ring_bbox(holes[hole_i]) - for ext_i, ring in enumerate(exteriors): - if bbox_contains(ring_bbox(ring), hole_bbox): + for ext_i, ext_bbox in enumerate(exterior_bboxes): + if bbox_contains(ext_bbox, hole_bbox): hole_exteriors[hole_i].append(ext_i) # then, for holes with still more than one possible exterior, do more detailed hole-in-ring test From 466920f27a5d31482dabb89a8df191aa5d6e8e1c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Fri, 18 Jul 2025 23:07:29 +0100 Subject: [PATCH 107/115] Make flakey network dependent tests optional This reverts commit 2dc17a78c4eb90196b36169e897ec36f55a6223e. Run on this branch Instead of plain pytest, run pytest -m "not network" Filter out pytest tests and doctests requiring internet downloads Run Ruff format Make skipping network tests optional --- .github/actions/test/action.yml | 9 ++++- shapefile.py | 65 ++++++++++++++++++++++++++------- 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 86ec93f3..10206063 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -4,6 +4,11 @@ name: description: Run pytest, and run the doctest runner (shapefile.py as a script). +inputs: + extra_args: + type: string + default: '-m "not network"' + runs: using: "composite" steps: @@ -13,7 +18,7 @@ runs: - name: Doctests shell: bash - run: python shapefile.py + run: python shapefile.py ${{ inputs.extra_args }} - name: Install test dependencies. shell: bash @@ -24,7 +29,7 @@ runs: - name: Pytest shell: bash run: | - pytest + pytest ${{ inputs.extra_args }} - name: Show versions for logs. shell: bash diff --git a/shapefile.py b/shapefile.py index 3a6bbe7e..be3650db 100644 --- a/shapefile.py +++ b/shapefile.py @@ -2776,13 +2776,56 @@ def field(self, name, fieldType="C", size="50", decimal=0): # Begin Testing -def test(**kwargs): +def _get_doctests(): import doctest doctest.NORMALIZE_WHITESPACE = 1 - verbosity = kwargs.get("verbose", 0) + + # run tests + with open("README.md", "rb") as fobj: + tests = doctest.DocTestParser().get_doctest( + string=fobj.read().decode("utf8").replace("\r\n", "\n"), + globs={}, + name="README", + filename="README.md", + lineno=0, + ) + + return tests + + +def _get_no_network_doctests(examples): + globals_from_network_doctests = set() + for example in examples: + if 'sf = shapefile.Reader("https://' in example.source: + globals_from_network_doctests.add("sf") + continue + lhs = example.source.partition("=")[0] + + for target in lhs.split(","): + target = target.strip() + if target in globals_from_network_doctests: + globals_from_network_doctests.remove(target) + + if globals_from_network_doctests: + continue + + yield example + + +def _test(verbosity=0): if verbosity == 0: - print("Running doctests...") + print("Getting doctests...") + tests = _get_doctests() + + if len(sys.argv) >= 3 and sys.argv[1:3] == ["-m", "not network"]: + if verbosity == 0: + print("Removing doctests requiring internet access...") + tests.examples = list(_get_no_network_doctests(tests.examples)) + + import doctest + + doctest.NORMALIZE_WHITESPACE = 1 # ignore py2-3 unicode differences import re @@ -2798,17 +2841,11 @@ def check_output(self, want, got, optionflags): def summarize(self): doctest.OutputChecker.summarize(True) - # run tests runner = doctest.DocTestRunner(checker=Py23DocChecker(), verbose=verbosity) - with open("README.md", "rb") as fobj: - test = doctest.DocTestParser().get_doctest( - string=fobj.read().decode("utf8").replace("\r\n", "\n"), - globs={}, - name="README", - filename="README.md", - lineno=0, - ) - failure_count, test_count = runner.run(test) + + if verbosity == 0: + print("Running %s doctests..." % len(tests.examples)) + failure_count, test_count = runner.run(tests) # print results if verbosity: @@ -2827,5 +2864,5 @@ def summarize(self): Doctests are contained in the file 'README.md', and are tested using the built-in testing libraries. """ - failure_count = test() + failure_count = _test() sys.exit(failure_count) From 19a9238d530cfccd5cd3f84fbaa8e0254413e9a0 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Sat, 19 Jul 2025 12:09:50 +0100 Subject: [PATCH 108/115] Test Network tests on localhost Run on this branch Run Network tests in container jobs Remove type field from Action input Add description and required fields to Action input. Add option to Clone repo of copies of files to serve locally in mock remote url tests Specify shell in action job step Use actions/checkout instead of git (unavailable in Python slim docker images) Checkout artefacts repo to .. Add pyshp_repo_directory input Serve artefacts on localhost:8000 Specify shell: bash in Github action Don't curl from localhost - not available in Python slim images (neither is wget) TRy requesting on localhost in Python non-slim images Correct path to custom test file Test local server Sleep for twice as long after starting simple Python server Don't output PyTest version Swap out remote URLs with stripped path localhost version Rename input variable Import re where needed and reformat Reformat Change env var to be yes / no, not True / False Reformat Pass list to urlunparse on Python 2 Specify port 8000 Use double quotes Separate network tests and non-network tests into different steps Trim whitespace Try Network tests on all platforms Print simplified localhost urls during Pytest network tests Reorder pre-commit hooks and add blank line Try curling from simplified url on Python2 Test curl from server only Remove errant ' Run doctests against Python 2 SimpleHTTPServer Special case "import shapefile" in doctests filter Update shapefile.py Always include first example doctest Run Pytest tests in Python 2 non-slim container Update action.yml Update test_shapefile.py Update test_shapefile.py Explicitly export env var Don't need to explicitly set env var. Test without patching localhost. Reformat Update shapefile.py Update shapefile.py Use Caddy instead of SimpleHTTPServer Run caddy in backgorund Run all tests in all containers, all platforms, all Python versions Revert to python -m http.server to avoid overloading Caddy releases page --- .github/actions/test/action.yml | 91 ++++++++++++++++- .../workflows/run_tests_hooks_and_tools.yml | 43 ++++++-- .pre-commit-config.yaml | 14 +-- shapefile.py | 99 ++++++++++++++++--- test_shapefile.py | 23 ++++- 5 files changed, 234 insertions(+), 36 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index 10206063..fd4fee7b 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -1,38 +1,119 @@ name: - Test + Run Doctests and Pytest description: Run pytest, and run the doctest runner (shapefile.py as a script). inputs: extra_args: - type: string + description: Extra command line args for Pytest and python shapefile.py default: '-m "not network"' + required: false + replace_remote_urls_with_localhost: + description: yes or no. Test loading shapefiles from a url, without overloading an external server from 30 parallel workflows. + default: 'no' + required: false + pyshp_repo_directory: + description: Path to where the PyShp repo was checked out to (to keep separate from Shapefiles & artefacts repo). + required: false + default: '.' + python-version: + description: Set to "2.7" to use caddy instead of python -m SimpleHTTPServer + required: true + + runs: using: "composite" steps: - # The Repo is required to already be checked out, e.g. by the calling workflow + # The PyShp repo is required to already be checked out into pyshp_repo_directory, + # e.g. by the calling workflow using: + # steps: + # - uses: actions/checkout@v4 + # with: + # path: ./Pyshp + # and then calling this Action with: + # - name: Run tests + # uses: ./Pyshp/.github/actions/test + # with: + # extra_args: "" + # replace_remote_urls_with_localhost: 'yes' + # pyshp_repo_directory: ./Pyshp # The Python to be tested with is required to already be setup, with "python" and "pip" on the system Path + - name: Checkout shapefiles and zip file artefacts repo + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} + uses: actions/checkout@v4 + with: + repository: JamesParrott/PyShp_test_shapefile + path: ./PyShp_test_shapefile + + - name: Serve shapefiles and zip file artefacts on localhost + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version != '2.7'}} + shell: bash + working-directory: ./PyShp_test_shapefile + run: | + python -m http.server 8000 & + echo "HTTP_SERVER_PID=$!" >> $GITHUB_ENV + sleep 4 # give server time to start + + - name: Download and unzip Caddy binary + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version == '2.7'}} + working-directory: . + shell: bash + run: | + curl -L https://github.com/caddyserver/caddy/releases/download/v2.10.0/caddy_2.10.0_linux_amd64.tar.gz --output caddy.tar.gz + tar -xzf caddy.tar.gz + + - name: Serve shapefiles and zip file artefacts on localhost using Caddy + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' && inputs.python-version == '2.7'}} + shell: bash + working-directory: . + run: | + ./caddy file-server --root ./PyShp_test_shapefile --listen :8000 & + echo "HTTP_SERVER_PID=$!" >> $GITHUB_ENV + sleep 2 # give server time to start + - name: Doctests shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }} + env: + REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} run: python shapefile.py ${{ inputs.extra_args }} - name: Install test dependencies. shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }} run: | python -m pip install --upgrade pip pip install -r requirements.test.txt - name: Pytest shell: bash + working-directory: ${{ inputs.pyshp_repo_directory }} + env: + REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} run: | - pytest ${{ inputs.extra_args }} + pytest -rA --tb=short ${{ inputs.extra_args }} - name: Show versions for logs. shell: bash run: | python --version - python -m pytest --version \ No newline at end of file + python -m pytest --version + + + # - name: Test http server + # # (needs a full Github Actions runner or a Python non-slim Docker image, + # # as the slim Debian images don't have curl or wget). + # if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} + # shell: bash + # run: curl http://localhost:8000/ne_110m_admin_0_tiny_countries.shp + + - name: Stop http server + if: ${{ inputs.replace_remote_urls_with_localhost == 'yes' }} + shell: bash + run: | + echo Killing http server process ID: ${{ env.HTTP_SERVER_PID }} + kill ${{ env.HTTP_SERVER_PID }} \ No newline at end of file diff --git a/.github/workflows/run_tests_hooks_and_tools.yml b/.github/workflows/run_tests_hooks_and_tools.yml index 94995ebe..468b2e2b 100644 --- a/.github/workflows/run_tests_hooks_and_tools.yml +++ b/.github/workflows/run_tests_hooks_and_tools.yml @@ -5,7 +5,7 @@ name: Run pre-commit hooks and tests on: push: pull_request: - branches: [ master ] + branches: [ master, ] workflow_call: workflow_dispatch: @@ -30,7 +30,7 @@ jobs: run: | pylint --disable=R,C test_shapefile.py - test_on_old_Pythons: + test_on_EOL_Pythons: strategy: fail-fast: false matrix: @@ -44,16 +44,28 @@ jobs: runs-on: ubuntu-latest container: - image: python:${{ matrix.python-version }}-slim + image: python:${{ matrix.python-version }} steps: - uses: actions/checkout@v4 + with: + path: ./Pyshp - - name: Run tests - uses: ./.github/actions/test + - name: Non-network tests + uses: ./Pyshp/.github/actions/test + with: + pyshp_repo_directory: ./Pyshp + python-version: ${{ matrix.python-version }} + - name: Network tests + uses: ./Pyshp/.github/actions/test + with: + extra_args: '-m network' + replace_remote_urls_with_localhost: 'yes' + pyshp_repo_directory: ./Pyshp + python-version: ${{ matrix.python-version }} - run_tests: + test_on_supported_Pythons: strategy: fail-fast: false matrix: @@ -74,11 +86,24 @@ jobs: runs-on: ${{ matrix.os }} steps: + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - uses: actions/checkout@v4 + with: + path: ./Pyshp - - uses: actions/setup-python@v5 + - name: Non-network tests + uses: ./Pyshp/.github/actions/test with: + pyshp_repo_directory: ./Pyshp python-version: ${{ matrix.python-version }} - - name: Run tests - uses: ./.github/actions/test \ No newline at end of file + - name: Network tests + uses: ./Pyshp/.github/actions/test + with: + extra_args: '-m network' + replace_remote_urls_with_localhost: 'yes' + pyshp_repo_directory: ./Pyshp + python-version: ${{ matrix.python-version }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f065f594..ffe59bf6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,15 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.4 hooks: - - id: check-yaml - - id: trailing-whitespace + - id: ruff-format - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: - id: isort name: isort (python) -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.4 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 hooks: - - id: ruff-format + - id: check-yaml + - id: trailing-whitespace diff --git a/shapefile.py b/shapefile.py index be3650db..31c2f7fd 100644 --- a/shapefile.py +++ b/shapefile.py @@ -25,6 +25,11 @@ # Module settings VERBOSE = True +# Test config (for the Doctest runner and test_shapefile.py) +REPLACE_REMOTE_URLS_WITH_LOCALHOST = ( + os.getenv("REPLACE_REMOTE_URLS_WITH_LOCALHOST", "").lower() == "yes" +) + # Constants for shape types NULL = 0 POINT = 1 @@ -2794,12 +2799,27 @@ def _get_doctests(): return tests -def _get_no_network_doctests(examples): +def _filter_network_doctests(examples, include_network=False, include_non_network=True): globals_from_network_doctests = set() - for example in examples: + + if not (include_network or include_non_network): + return + + examples_it = iter(examples) + + yield next(examples_it) + + for example in examples_it: + # Track variables in doctest shell sessions defined from commands + # that poll remote URLs, to skip subsequent commands until all + # such dependent variables are reassigned. + if 'sf = shapefile.Reader("https://' in example.source: globals_from_network_doctests.add("sf") + if include_network: + yield example continue + lhs = example.source.partition("=")[0] for target in lhs.split(","): @@ -2807,28 +2827,85 @@ def _get_no_network_doctests(examples): if target in globals_from_network_doctests: globals_from_network_doctests.remove(target) + # Non-network tests dependent on the network tests. if globals_from_network_doctests: + if include_network: + yield example + continue + + if not include_non_network: continue yield example -def _test(verbosity=0): +def _replace_remote_url( + old_url, + # Default port of Python http.server and Python 2's SimpleHttpServer + port=8000, + scheme="http", + netloc="localhost", + path=None, + params="", + query="", + fragment="", +): + old_parsed = urlparse(old_url) + + # Strip subpaths, so an artefacts + # repo or file tree can be simpler and flat + if path is None: + path = old_parsed.path.rpartition("/")[2] + + if port not in (None, ""): + netloc = "%s:%s" % (netloc, port) + + new_parsed = old_parsed._replace( + scheme=scheme, + netloc=netloc, + path=path, + params=params, + query=query, + fragment=fragment, + ) + + new_url = urlunparse(new_parsed) if PYTHON3 else urlunparse(list(new_parsed)) + return new_url + + +def _test(args=sys.argv[1:], verbosity=0): if verbosity == 0: print("Getting doctests...") - tests = _get_doctests() - - if len(sys.argv) >= 3 and sys.argv[1:3] == ["-m", "not network"]: - if verbosity == 0: - print("Removing doctests requiring internet access...") - tests.examples = list(_get_no_network_doctests(tests.examples)) import doctest + import re doctest.NORMALIZE_WHITESPACE = 1 - # ignore py2-3 unicode differences - import re + tests = _get_doctests() + + if len(args) >= 2 and args[0] == "-m": + if verbosity == 0: + print("Filtering doctests...") + tests.examples = list( + _filter_network_doctests( + tests.examples, + include_network=args[1] == "network", + include_non_network=args[1] == "not network", + ) + ) + + if REPLACE_REMOTE_URLS_WITH_LOCALHOST: + if verbosity == 0: + print("Replacing remote urls with http://localhost in doctests...") + + for example in tests.examples: + match_url_str_literal = re.search(r'"(https://.*)"', example.source) + if not match_url_str_literal: + continue + old_url = match_url_str_literal.group(1) + new_url = _replace_remote_url(old_url) + example.source = example.source.replace(old_url, new_url) class Py23DocChecker(doctest.OutputChecker): def check_output(self, want, got, optionflags): diff --git a/test_shapefile.py b/test_shapefile.py index 7984e91f..1b7182f9 100644 --- a/test_shapefile.py +++ b/test_shapefile.py @@ -487,16 +487,31 @@ def test_reader_url(): """ Assert that Reader can open shapefiles from a url. """ + + # Allow testing loading of shapefiles from a url on localhost (to avoid + # overloading external servers, and associated spurious test failures). + # A suitable repo of test files, and a localhost server setup is + # defined in ./.github/actions/test/actions.yml + if shapefile.REPLACE_REMOTE_URLS_WITH_LOCALHOST: + + def Reader(url): + new_url = shapefile._replace_remote_url(url) + print("repr(new_url): %s" % repr(new_url)) + return shapefile.Reader(new_url) + else: + print("Using plain Reader") + Reader = shapefile.Reader + # test with extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries.shp?raw=true" - with shapefile.Reader(url) as sf: + with Reader(url) as sf: for __recShape in sf.iterShapeRecords(): pass assert sf.shp.closed is sf.shx.closed is sf.dbf.closed is True # test without extension url = "https://github.com/nvkelso/natural-earth-vector/blob/master/110m_cultural/ne_110m_admin_0_tiny_countries?raw=true" - with shapefile.Reader(url) as sf: + with Reader(url) as sf: for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 @@ -505,12 +520,12 @@ def test_reader_url(): # test no files found url = "https://raw.githubusercontent.com/nvkelso/natural-earth-vector/master/README.md" with pytest.raises(shapefile.ShapefileException): - with shapefile.Reader(url) as sf: + with Reader(url) as sf: pass # test reading zipfile from url url = "https://github.com/JamesParrott/PyShp_test_shapefile/raw/main/gis_osm_natural_a_free_1.zip" - with shapefile.Reader(url) as sf: + with Reader(url) as sf: for __recShape in sf.iterShapeRecords(): pass assert len(sf) > 0 From 0045cc88fae9830771338b46ec563e32def69d5c Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 01:39:39 +0100 Subject: [PATCH 109/115] Describe strategy for network tests. --- .github/actions/test/action.yml | 2 +- README.md | 53 ++++++++++++++++++++++++++++----- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml index fd4fee7b..c6ca65a4 100644 --- a/.github/actions/test/action.yml +++ b/.github/actions/test/action.yml @@ -95,7 +95,7 @@ runs: env: REPLACE_REMOTE_URLS_WITH_LOCALHOST: ${{ inputs.replace_remote_urls_with_localhost }} run: | - pytest -rA --tb=short ${{ inputs.extra_args }} + pytest -rA --tb=short ${{ inputs.extra_args }} - name: Show versions for logs. shell: bash diff --git a/README.md b/README.md index 2a211cc3..3c743d8e 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,9 @@ part of your geospatial project. - Equality comparisons between Records now also require the fields to be the same (and in the same order). ### Development: -- Code quality tools run on PyShp +- Code quality tools (Ruff format) run on PyShp +- Network, non-network, or all doctests selectable via command line args +- Network tests made runnable on localhost. ## 2.3.1 @@ -1434,24 +1436,61 @@ ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiP The testing framework is pytest, and the tests are located in test_shapefile.py. This includes an extensive set of unit tests of the various pyshp features, -and tests against various input data. Some of the tests that require -internet connectivity will be skipped in offline testing environments. +and tests against various input data. In the same folder as README.md and shapefile.py, from the command line run -``` -$ python -m pytest + +```shell +python -m pytest ``` Additionally, all the code and examples located in this file, README.md, is tested and verified with the builtin doctest framework. A special routine for invoking the doctest is run when calling directly on shapefile.py. In the same folder as README.md and shapefile.py, from the command line run -``` -$ python shapefile.py + +```shell +python shapefile.py ``` Linux/Mac and similar platforms may need to run `$ dos2unix README.md` in order to correct line endings in README.md, if Git has not automatically changed them. +## Network tests + +Some of the tests and doctests, are intended to test reading shapefiles from +remote servers, which requires internet connectivity. The pytest tests are marked "network". +For rapid iteration, in CI, or when developing in offline testing environments, these +tests can be dealt with in two ways: + i) by skipping the network tests via : +```shell +pytest -m "not network" +``` +or the doctests via: +```shell +python shapefile.py -m "not network" +``` +or ii) by cloning a repo of the files they download, serving these on localhost in a separate process, +and running the network tests with the environment variable REPLACE_REMOTE_URLS_WITH_LOCALHOST to `yes`: +Setup a local file server (*): +``` +git clone http://github.com/JamesParrott/PyShp_test_shapefile +cd PyShp_test_shapefile +python -m http.server 8000 +``` +and then: +```bash +REPLACE_REMOTE_URLS_WITH_LOCALHOST=yes && pytest +``` +or the doctests via: +```bash +REPLACE_REMOTE_URLS_WITH_LOCALHOST=yes && python shapefile.py +``` +The network tests alone can also be run (without also running all the tests that don't +make network requests) using: `pytest -m network` (or the doctests using: `python shapefile.py -m network`). + +(*) The steps to host the files using Caddy for PYthon 2 are in ./actions/test/action.yml. For reasons as +yet unknown, shapefile.py's Reader class in Python 2 Pytest, can't connect to a Python 2 SimpleHTTPServer. + # Contributors From 3faf5756be90103847f2353f88d4abf84ffebf0a Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 11:03:14 +0100 Subject: [PATCH 110/115] Remove trailing whitespace --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3c743d8e..f2fc96b6 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ part of your geospatial project. ### Development: - Code quality tools (Ruff format) run on PyShp -- Network, non-network, or all doctests selectable via command line args +- Network, non-network, or all doctests selectable via command line args - Network tests made runnable on localhost. ## 2.3.1 @@ -1436,7 +1436,7 @@ ESRI White Paper](http://downloads.esri.com/support/whitepapers/ao_/J9749_MultiP The testing framework is pytest, and the tests are located in test_shapefile.py. This includes an extensive set of unit tests of the various pyshp features, -and tests against various input data. +and tests against various input data. In the same folder as README.md and shapefile.py, from the command line run ```shell @@ -1459,7 +1459,7 @@ to correct line endings in README.md, if Git has not automatically changed them. Some of the tests and doctests, are intended to test reading shapefiles from remote servers, which requires internet connectivity. The pytest tests are marked "network". -For rapid iteration, in CI, or when developing in offline testing environments, these +For rapid iteration, in CI, or when developing in offline testing environments, these tests can be dealt with in two ways: i) by skipping the network tests via : ```shell @@ -1485,10 +1485,10 @@ or the doctests via: ```bash REPLACE_REMOTE_URLS_WITH_LOCALHOST=yes && python shapefile.py ``` -The network tests alone can also be run (without also running all the tests that don't +The network tests alone can also be run (without also running all the tests that don't make network requests) using: `pytest -m network` (or the doctests using: `python shapefile.py -m network`). -(*) The steps to host the files using Caddy for PYthon 2 are in ./actions/test/action.yml. For reasons as +(*) The steps to host the files using Caddy for PYthon 2 are in ./actions/test/action.yml. For reasons as yet unknown, shapefile.py's Reader class in Python 2 Pytest, can't connect to a Python 2 SimpleHTTPServer. From 0b153232c0c9dff64b0259c21e003e074df4d255 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 11:20:57 +0100 Subject: [PATCH 111/115] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 5b39efe1..d1734202 100644 --- a/.gitignore +++ b/.gitignore @@ -21,5 +21,6 @@ dist/ *.py[cod] .vscode .dmypy.json +.python-version .venv venv From d5d3c3b040ef33e6a22d7d7d2c7c0693309a5ff2 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 11:41:59 +0100 Subject: [PATCH 112/115] v2.4.0 --- README.md | 2 +- shapefile.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f2fc96b6..ccb3284c 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ part of your geospatial project. # Version Changes -## 2.3.x +## 2.4.0 ### New Features: - Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. diff --git a/shapefile.py b/shapefile.py index 31c2f7fd..211fd48f 100644 --- a/shapefile.py +++ b/shapefile.py @@ -6,7 +6,7 @@ Compatible with Python versions 2.7-3.x """ -__version__ = "2.3.1" +__version__ = "2.4.0" import array import io From 0b45d1257d1c5c522669d9e01e4cc7aa7cebb56e Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 12:06:06 +0100 Subject: [PATCH 113/115] Announce dropping support Python 2 & 3.8 in PyShp 3.0.0 --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index ccb3284c..c55e2043 100644 --- a/README.md +++ b/README.md @@ -97,6 +97,17 @@ part of your geospatial project. ## 2.4.0 +### Breaking Change. Support for Python 2 and Pythons <= 3.8 to be dropped. +- PyShp 2.4.0 is the latest (and likely last) version of PyShp to support Python 2.7 and Pythons <= 3.8. +These CPython versions have reached [end of life](https://devguide.python.org/versions/#versions). +- Future development will focus on PyShp v3.0.0 onwards (currently intended to supporting Pythons >= 3.9). +- This will not break any projects, as pip and other package managers should not install PyShp 3.0.0 +(after its release) in unsupported Pythons. But we no longer promise such projects will get PyShp's latest +bug fixes and features. +- If this negatively impacts your project, all feedback about this decision is welcome +on our [the discussion page](https://github.com/GeospatialPython/pyshp/discussions/290). + + ### New Features: - Reader.iterRecords now allows start and stop to be specified, to lookup smaller ranges of records. - Equality comparisons between Records now also require the fields to be the same (and in the same order). From 4286aaa0fc55ae93effc99357ac9254f1a3013f7 Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 12:36:02 +0100 Subject: [PATCH 114/115] Correct name of called workflow --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9a1fa30a..104c28e9 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -29,7 +29,7 @@ jobs: python-version: '3.x' - name: Run tests and hooks - uses: ./.github/workflows/run_tests_and_hooks.yml + uses: ./.github/workflows/run_tests_hooks_and_tools.yml deploy: # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests. From 4a7bd1dd911d9b97437ea77fc1631946f7e53b0f Mon Sep 17 00:00:00 2001 From: James Parrott <80779630+JamesParrott@users.noreply.github.com> Date: Mon, 21 Jul 2025 13:10:15 +0100 Subject: [PATCH 115/115] Correct syntax to call reusable workflow (uses: goes in job: job_name: not steps:) --- .github/workflows/deploy.yml | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 104c28e9..c66adc89 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -14,22 +14,11 @@ on: jobs: test: - # In general, tests should be run after building a distribution, to test that distribution. # However as long as PyShp is a pure Python library, with pure Python deps (or no deps) # then this would only test the packaging process, not so much the code as there are # no binaries. - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - - name: Run tests and hooks - uses: ./.github/workflows/run_tests_hooks_and_tools.yml + uses: ./.github/workflows/run_tests_hooks_and_tools.yml deploy: # Prevent deployment of releases that fail any hooks (e.g. linting) or that fail any tests.