Skip to content

Commit 6d0b3a3

Browse files
wybertkoettert
authored andcommitted
handle the missing value and any bad address in each row
1 parent 42caf56 commit 6d0b3a3

File tree

1 file changed

+38
-23
lines changed

1 file changed

+38
-23
lines changed

knime_extension/src/nodes/conversion.py

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@ def execute(self, exec_context, input_table):
133133
)
134134
@knut.geo_node_description(
135135
short_description="Converts the input Well-known-text (WKT) column to a geometry column.",
136-
description="""This node converts the selected
137-
[Well-known-text (WKT)](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) input column to
136+
description="""This node converts the selected
137+
[Well-known-text (WKT)](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) input column to
138138
a geometry column in the units of the provided CRS.
139139
""",
140140
references={
@@ -209,7 +209,7 @@ def __init__(self):
209209
)
210210
@knut.geo_node_description(
211211
short_description="Converts the input GeoJSON column to a geometry column.",
212-
description="""This node converts the selected [GeoJSON](https://en.wikipedia.org/wiki/GeoJSON) input column to
212+
description="""This node converts the selected [GeoJSON](https://en.wikipedia.org/wiki/GeoJSON) input column to
213213
a geometry column in the units of the provided CRS.
214214
""",
215215
references={
@@ -480,7 +480,7 @@ def __init__(self):
480480
)
481481
@knut.geo_node_description(
482482
short_description="Converts the input geometry column to a Well-known-binary (WKB) column.",
483-
description="""This node converts the selected geometry column into a
483+
description="""This node converts the selected geometry column into a
484484
[GeoJSON](https://en.wikipedia.org/wiki/GeoJSON) column.
485485
""",
486486
references={
@@ -647,7 +647,7 @@ class GeocodingServiceSettings:
647647

648648
api_key = knext.StringParameter(
649649
"API key",
650-
"""Enter the API key for the service provider.
650+
"""Enter the API key for the service provider.
651651
You can leave this field empty if the service provider (such as `nominatim` and `arcgis`) doesn't require an API key.""",
652652
default_value="",
653653
)
@@ -693,9 +693,9 @@ class GeocodingServiceSettings:
693693
The node uses the [Nominatim](https://nominatim.org/) service by default.
694694
You can change the service provider and API key in the node settings.
695695
See the [geopy documentation](https://geopy.readthedocs.io/en/stable/#module-geopy.geocoders) for more information.
696-
Notice that the service provider and API key are only required for some service providers.
697-
For example, you do not have to enter them forNomintim or ArcGIS.
698-
The addresses can be like `1600 Amphitheatre Parkway, Mountain View, CA`
696+
Notice that the service provider and API key are only required for some service providers.
697+
For example, you do not have to enter them for Nomintim or ArcGIS.
698+
The addresses can be like `1600 Amphitheatre Parkway, Mountain View, CA`
699699
or `1600 Amphitheatre Parkway, Mountain View, CA, United States`.
700700
""",
701701
references={
@@ -759,21 +759,36 @@ def execute(self, exec_context: knext.ExecutionContext, input_table):
759759
min_delay_seconds=self.geocoding_service_settings.min_delay_seconds,
760760
)
761761

762-
tmp_col = knut.get_unique_column_name("__location__", input_table.schema)
763-
tmp_lat = knut.get_unique_column_name("__latitude__", input_table.schema)
764-
tmp_long = knut.get_unique_column_name("__longitude__", input_table.schema)
765-
df[tmp_col] = df[self.address_col].apply(lambda x: geocode(x))
766-
df[tmp_lat] = df[tmp_col].apply(lambda x: x.latitude)
767-
df[tmp_long] = df[tmp_col].apply(lambda x: x.longitude)
768-
762+
process_counter = 1
763+
n_loop = len(df)
769764
result_col_name = knut.get_unique_column_name(self.name, input_table.schema)
765+
df[result_col_name] = None
766+
for index, row in df.iterrows():
767+
try:
768+
result = geocode(row[self.address_col])
769+
if result is None:
770+
knut.LOGGER.warning(
771+
f"Got none result at index {index}, address: {row[self.address_col]}"
772+
)
773+
else:
774+
from shapely.geometry import Point
770775

771-
df[result_col_name] = gp.points_from_xy(df[tmp_long], df[tmp_lat])
772-
773-
gdf = gp.GeoDataFrame(df, geometry=result_col_name, crs=kproj.DEFAULT_CRS)
776+
df.at[index, result_col_name] = Point(
777+
result.longitude, result.latitude
778+
)
779+
except Exception as e:
780+
knut.LOGGER.warning(
781+
f"Error at index {index}, address: {row[self.address_col]}, error: {e}"
782+
)
774783

775-
gdf.drop(columns=[tmp_col, tmp_lat, tmp_long], inplace=True)
784+
exec_context.set_progress(
785+
0.9 * process_counter / n_loop,
786+
"Batch %d of %d processed" % (process_counter, n_loop),
787+
)
788+
knut.check_canceled(exec_context)
789+
process_counter += 1
776790

791+
gdf = gp.GeoDataFrame(df, geometry=result_col_name, crs=kproj.DEFAULT_CRS)
777792
return knut.to_table(gdf)
778793

779794

@@ -805,7 +820,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table):
805820
The node uses the [Nominatim](https://nominatim.org/) service by default.
806821
You can change the service provider and API key in the node settings.
807822
See the [geopy documentation](https://geopy.readthedocs.io/en/stable/#module-geopy.geocoders) for more information.
808-
Notice that the service provider and API key are only required for some service providers.
823+
Notice that the service provider and API key are only required for some service providers.
809824
For example, you do not have to enter them for Nominatim or ArcGIS.
810825
""",
811826
references={
@@ -829,7 +844,7 @@ class GeoReverseGeocodingNode:
829844

830845
append_raw_json = knext.BoolParameter(
831846
"Append raw json",
832-
"""If selected, the provider dependent raw json string of the result will be appended to a new column.
847+
"""If selected, the provider dependent raw json string of the result will be appended to a new column.
833848
It is useful for extracting specific information such as the city.""",
834849
default_value=False,
835850
)
@@ -1246,13 +1261,13 @@ class MetadataNode:
12461261
)
12471262
extract_type = knext.BoolParameter(
12481263
label="Extract type",
1249-
description="""Extract the [geometry type](https://shapely.readthedocs.io/en/stable/manual.html#object.geom_type)
1264+
description="""Extract the [geometry type](https://shapely.readthedocs.io/en/stable/manual.html#object.geom_type)
12501265
into a column""",
12511266
default_value=False,
12521267
)
12531268
extract_z = knext.BoolParameter(
12541269
label="Extract z coordinate flag",
1255-
description="""Extract a flag that indicates if the geometry has a
1270+
description="""Extract a flag that indicates if the geometry has a
12561271
[z coordinate]([CRS](https://en.wikipedia.org/wiki/Spatial_reference_system))""",
12571272
default_value=False,
12581273
)

0 commit comments

Comments
 (0)