Skip to content

Commit fffa2d5

Browse files
[Fixes GeoNode#12412] Download whole asset (GeoNode#12425)
* [Fixes GeoNode#12412] Download whole asset * [Fixes GeoNode#12412] Refactor using zipstream as library * [Fixes GeoNode#12412] add test coverage for assets download * [Fixes GeoNode#12412] remove duplicate zipstreaming library with newest one * recurse asset folder to avoid ZipStream creating a parent path --------- Co-authored-by: G. Allegri <giohappy@gmail.com>
1 parent 463022c commit fffa2d5

File tree

8 files changed

+83
-74
lines changed

8 files changed

+83
-74
lines changed

.github/workflows/flake8.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,4 @@ jobs:
3737
flake8 geonode --count --statistics
3838
3939
- name: "Check: black"
40-
run: black --check geonode
40+
run: black -t py310 --check geonode

geonode/assets/local.py

+23-9
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44
import shutil
55

66
from django.conf import settings
7-
from django.http import HttpResponse
7+
from django.http import HttpResponse, StreamingHttpResponse
88
from django.urls import reverse
99
from django_downloadview import DownloadResponse
10+
from zipstream import ZipStream, walk
1011

1112
from geonode.assets.handlers import asset_handler_registry, AssetHandlerInterface, AssetDownloadHandlerInterface
1213
from geonode.assets.models import LocalAsset
@@ -241,14 +242,27 @@ def create_response(
241242
filename = os.path.basename(localfile)
242243
orig_base, ext = os.path.splitext(filename)
243244
outname = f"{basename or orig_base or 'file'}{ext}"
244-
245-
logger.info(f"Returning file '{localfile}' with name '{outname}'")
246-
247-
return DownloadResponse(
248-
_asset_storage_manager.open(localfile).file,
249-
basename=f"{outname}",
250-
attachment=attachment,
251-
)
245+
match attachment:
246+
case True:
247+
logger.info(f"Zipping file '{localfile}' with name '{orig_base}'")
248+
zs = ZipStream(sized=True)
249+
for filepath in walk(LocalAssetHandler._get_managed_dir(asset)):
250+
zs.add_path(filepath, os.path.basename(filepath))
251+
# closing zip for all contents to be written
252+
return StreamingHttpResponse(
253+
zs,
254+
content_type="application/zip",
255+
headers={
256+
"Content-Disposition": f"attachment; filename={orig_base}.zip",
257+
"Content-Length": len(zs),
258+
"Last-Modified": zs.last_modified,
259+
},
260+
)
261+
case False:
262+
logger.info(f"Returning file '{localfile}' with name '{outname}'")
263+
return DownloadResponse(
264+
_asset_storage_manager.open(localfile).file, basename=f"{outname}", attachment=False
265+
)
252266
else:
253267
logger.warning(f"Internal file {localfile} not found for asset {asset.id}")
254268
return HttpResponse(f"Internal file not found for asset {asset.id}", status=404 if path else 500)

geonode/assets/tests.py

+33-14
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
from django.conf import settings
2727
from django.contrib.auth import get_user_model
28+
from django.http import StreamingHttpResponse
2829
from django.urls import reverse
2930

3031
from rest_framework.test import APITestCase
@@ -225,13 +226,43 @@ def test_download_file(self):
225226
u, _ = get_user_model().objects.get_or_create(username="admin")
226227
self.assertTrue(self.client.login(username="admin", password="admin"), "Login failed")
227228

229+
asset = self._setup_test(u)
230+
231+
for path, key in ((None, "one"), ("one.json", "one"), ("two.json", "two"), ("subdir/three.json", "three")):
232+
# args = [asset.pk, path] if path else [asset.pk]
233+
args = {"pk": asset.pk, "path": path} if path else {"pk": asset.pk}
234+
logger.info(f"*** Testing path '{path}' args {args}")
235+
url = reverse("assets-link", kwargs=args)
236+
logger.info(f"REVERSE url is {url}")
237+
response = self.client.get(url)
238+
content = self._get_streaming_content(response)
239+
rjson = json.loads(content)
240+
self.assertEqual(response.status_code, 200)
241+
self.assertIn(key, rjson, f"Key '{key}' not found in path '{path}': {rjson} URL {url}")
242+
logger.info(f"Test for path '{path}' OK")
243+
244+
def test_download_with_attachment(self):
245+
u, _ = get_user_model().objects.get_or_create(username="admin")
246+
self.assertTrue(self.client.login(username="admin", password="admin"), "Login failed")
247+
248+
for key, el in (("one", ONE_JSON), ("two", TWO_JSON), ("three", THREE_JSON)):
249+
asset = self._setup_test(u, _file=el)
250+
251+
url = reverse("assets-download", kwargs={"pk": asset.pk})
252+
logger.info(f"REVERSE url is {url}")
253+
response = self.client.get(url)
254+
self.assertEqual(response.status_code, 200)
255+
self.assertTrue(isinstance(response, StreamingHttpResponse))
256+
self.assertEqual(response.get("Content-Disposition"), f"attachment; filename={key}.zip")
257+
258+
def _setup_test(self, u, _file=ONE_JSON):
228259
asset_handler = asset_handler_registry.get_default_handler()
229260
asset = asset_handler.create(
230261
title="Test Asset",
231262
description="Description of test asset",
232263
type="NeverMind",
233264
owner=u,
234-
files=[ONE_JSON],
265+
files=[_file],
235266
clone_files=True,
236267
)
237268
asset.save()
@@ -245,16 +276,4 @@ def test_download_file(self):
245276
os.mkdir(sub_dir)
246277
shutil.copy(TWO_JSON, asset_dir)
247278
shutil.copy(THREE_JSON, sub_dir)
248-
249-
for path, key in ((None, "one"), ("one.json", "one"), ("two.json", "two"), ("subdir/three.json", "three")):
250-
# args = [asset.pk, path] if path else [asset.pk]
251-
args = {"pk": asset.pk, "path": path} if path else {"pk": asset.pk}
252-
logger.info(f"*** Testing path '{path}' args {args}")
253-
url = reverse("assets-link", kwargs=args)
254-
logger.info(f"REVERSE url is {url}")
255-
response = self.client.get(url)
256-
content = self._get_streaming_content(response)
257-
rjson = json.loads(content)
258-
self.assertEqual(response.status_code, 200)
259-
self.assertIn(key, rjson, f"Key '{key}' not found in path '{path}': {rjson} URL {url}")
260-
logger.info(f"Test for path '{path}' OK")
279+
return asset

geonode/proxy/tests.py

+7-15
Original file line numberDiff line numberDiff line change
@@ -310,11 +310,9 @@ def test_download_url_with_existing_files(self, fopen, fexists):
310310
dataset = Dataset.objects.all().first()
311311

312312
dataset_files = [
313-
"/tmpe1exb9e9/foo_file.dbf",
314-
"/tmpe1exb9e9/foo_file.prj",
315-
"/tmpe1exb9e9/foo_file.shp",
316-
"/tmpe1exb9e9/foo_file.shx",
313+
f"{settings.PROJECT_ROOT}/assets/tests/data/one.json",
317314
]
315+
318316
asset, link = create_asset_and_link(
319317
dataset, get_user_model().objects.get(username="admin"), dataset_files, clone_files=False
320318
)
@@ -333,7 +331,7 @@ def test_download_url_with_existing_files(self, fopen, fexists):
333331
# Espected 404 since there are no files available for this layer
334332
self.assertEqual(response.status_code, 200)
335333
self.assertEqual("application/zip", response.headers.get("Content-Type"))
336-
self.assertEqual('attachment; filename="CA.zip"', response.headers.get("Content-Disposition"))
334+
self.assertEqual("attachment; filename=CA.zip", response.headers.get("Content-Disposition"))
337335

338336
link.delete()
339337
asset.delete()
@@ -347,11 +345,9 @@ def test_download_files(self, fopen, fexists):
347345
dataset = Dataset.objects.all().first()
348346

349347
dataset_files = [
350-
"/tmpe1exb9e9/foo_file.dbf",
351-
"/tmpe1exb9e9/foo_file.prj",
352-
"/tmpe1exb9e9/foo_file.shp",
353-
"/tmpe1exb9e9/foo_file.shx",
348+
f"{settings.PROJECT_ROOT}/assets/tests/data/one.json",
354349
]
350+
355351
asset, link = create_asset_and_link(
356352
dataset, get_user_model().objects.get(username="admin"), dataset_files, clone_files=False
357353
)
@@ -367,16 +363,12 @@ def test_download_files(self, fopen, fexists):
367363
# headers and status assertions
368364
self.assertEqual(response.status_code, 200)
369365
self.assertEqual(response.get("content-type"), "application/zip")
370-
self.assertEqual(response.get("content-disposition"), f'attachment; filename="{dataset.name}.zip"')
366+
self.assertEqual(response.get("content-disposition"), f"attachment; filename={dataset.name}.zip")
371367
# Inspect content
372368
zip_content = io.BytesIO(b"".join(response.streaming_content))
373369
zip = zipfile.ZipFile(zip_content)
374370
zip_files = zip.namelist()
375-
self.assertEqual(len(zip_files), 4)
376-
self.assertIn(".shp", "".join(zip_files))
377-
self.assertIn(".dbf", "".join(zip_files))
378-
self.assertIn(".shx", "".join(zip_files))
379-
self.assertIn(".prj", "".join(zip_files))
371+
self.assertIn(".json", "".join(zip_files))
380372

381373
link.delete()
382374
asset.delete()

geonode/proxy/views.py

+13-30
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import gzip
2323
import logging
2424
import traceback
25-
import zipstream
2625

2726
from hyperlink import URL
2827
from urllib.parse import urlparse, urlsplit, urljoin
@@ -56,7 +55,7 @@
5655
from geonode.base.auth import get_auth_user, get_token_from_auth_header
5756
from geonode.geoserver.helpers import ogc_server_settings
5857
from geonode.assets.utils import get_default_asset
59-
58+
from zipstream import ZipStream
6059
from .utils import proxy_urls_registry
6160

6261
logger = logging.getLogger(__name__)
@@ -244,21 +243,13 @@ def download(request, resourceid, sender=Dataset):
244243

245244
if isinstance(instance, ResourceBase):
246245
dataset_files = []
247-
file_list = [] # Store file info to be returned
248246
try:
249247
asset_obj = get_default_asset(instance)
250248
# Copy all Dataset related files into a temporary folder
251249
files = asset_obj.location if asset_obj else []
252250
for file_path in files:
253251
if storage_manager.exists(file_path):
254252
dataset_files.append(file_path)
255-
filename = os.path.basename(file_path)
256-
file_list.append(
257-
{
258-
"name": filename,
259-
"data_iter": storage_manager.open(file_path),
260-
}
261-
)
262253
else:
263254
return HttpResponse(
264255
loader.render_to_string(
@@ -282,27 +273,19 @@ def download(request, resourceid, sender=Dataset):
282273

283274
# ZIP everything and return
284275
target_file_name = "".join([instance.name, ".zip"])
285-
286-
target_zip = zipstream.ZipFile(mode="w", compression=zipstream.ZIP_DEFLATED, allowZip64=True)
287-
288-
# Iterable: Needed when the file_info has it's data as a stream
289-
def _iterable(source_iter):
290-
while True:
291-
buf = source_iter.read(BUFFER_CHUNK_SIZE)
292-
if not buf:
293-
break
294-
yield buf
295-
296-
# Add files to zip
297-
for file_info in file_list:
298-
target_zip.write_iter(arcname=file_info["name"], iterable=_iterable(file_info["data_iter"]))
299-
300276
register_event(request, "download", instance)
301-
302-
# Streaming content response
303-
response = StreamingHttpResponse(target_zip, content_type="application/zip")
304-
response["Content-Disposition"] = f'attachment; filename="{target_file_name}"'
305-
return response
277+
folder = os.path.dirname(dataset_files[0])
278+
279+
zs = ZipStream.from_path(folder)
280+
return StreamingHttpResponse(
281+
zs,
282+
content_type="application/zip",
283+
headers={
284+
"Content-Disposition": f"attachment; filename={target_file_name}",
285+
"Content-Length": len(zs),
286+
"Last-Modified": zs.last_modified,
287+
},
288+
)
306289
except (NotImplementedError, Upload.DoesNotExist):
307290
traceback.print_exc()
308291
tb = traceback.format_exc()

geonode/storage/data_retriever.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,10 @@ def file_chunks_iterable(file, chunk_size=None):
112112
for chunk in self._django_form_file.chunks():
113113
tmp_file.write(chunk)
114114
else:
115-
with open(self.file_path, "wb") as tmp_file, smart_open.open(
116-
uri=self._original_file_uri, mode="rb"
117-
) as original_file:
115+
with (
116+
open(self.file_path, "wb") as tmp_file,
117+
smart_open.open(uri=self._original_file_uri, mode="rb") as original_file,
118+
):
118119
for chunk in file_chunks_iterable(original_file):
119120
tmp_file.write(chunk)
120121

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ tqdm==4.66.4
2121
Deprecated==1.2.14
2222
wrapt==1.16.0
2323
jsonschema==4.22.0
24-
zipstream-new==1.1.8
2524
schema==0.7.7
2625
rdflib==6.3.2
2726
smart_open==7.0.4
2827
PyMuPDF==1.24.3
2928
defusedxml==0.7.1
29+
zipstream-ng==1.7.1
3030

3131
# Django Apps
3232
django-allauth==0.63.6

setup.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ install_requires =
4747
Deprecated==1.2.14
4848
wrapt==1.16.0
4949
jsonschema==4.22.0
50-
zipstream-new==1.1.8
50+
zipstream-ng==1.7.1
5151
schema==0.7.7
5252
rdflib==6.3.2
5353
smart_open==7.0.4

0 commit comments

Comments
 (0)