Skip to content

Commit faf7f65

Browse files
committed
fix: continue download on 404
1 parent 9806d04 commit faf7f65

File tree

2 files changed

+14
-7
lines changed

2 files changed

+14
-7
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ databusclient download https://databus.dbpedia.org/dbpedia/mappings
122122

123123
If no `--localdir` is provided, the current working directory is used as base directory. The downloaded files will be stored in the working directory in a folder structure according to the databus structure, i.e. `./$ACCOUNT/$GROUP/$ARTIFACT/$VERSION/`.
124124

125-
**Collcetion**: download of all files within a collection
125+
**Collection**: download of all files within a collection
126126
```
127127
databusclient download https://databus.dbpedia.org/dbpedia/collections/dbpedia-snapshot-2022-12
128128
```

databusclient/client.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,14 @@ def __download_file__(url, filename, vault_token_file=None, auth_url=None, clien
437437
# --- 4. Retry with token ---
438438
response = requests.get(url, headers=headers, stream=True)
439439

440-
response.raise_for_status() # Raise if still failing
440+
try:
441+
response.raise_for_status() # Raise if still failing
442+
except requests.exceptions.HTTPError as e:
443+
if response.status_code == 404:
444+
print(f"WARNING: Skipping file {url} because it was not found (404).")
445+
return
446+
else:
447+
raise e
441448

442449
total_size_in_bytes = int(response.headers.get('content-length', 0))
443450
block_size = 1024 # 1 KiB
@@ -559,7 +566,7 @@ def __get_databus_latest_version_of_artifact__(json_str: str) -> str:
559566
"""
560567
json_dict = json.loads(json_str)
561568
versions = json_dict.get("databus:hasVersion")
562-
569+
563570
# Single version case {}
564571
if isinstance(versions, dict):
565572
versions = [versions]
@@ -581,7 +588,7 @@ def __get_databus_artifacts_of_group__(json_str: str) -> List[str]:
581588
"""
582589
json_dict = json.loads(json_str)
583590
artifacts = json_dict.get("databus:hasArtifact", [])
584-
591+
585592
result = []
586593
for item in artifacts:
587594
uri = item.get("@id")
@@ -661,7 +668,7 @@ def download(
661668
if endpoint is None:
662669
endpoint = f"https://{host}/sparql"
663670
print(f"SPARQL endpoint {endpoint}")
664-
671+
665672
# databus collection
666673
if "/collections/" in databusURI: # TODO "in" is not safe! there could be an artifact named collections, need to check for the correct part position in the URI
667674
query = __handle_databus_collection__(databusURI)
@@ -683,7 +690,7 @@ def download(
683690
json_str = __get_json_ld_from_databus__(latest)
684691
res = __handle_databus_artifact_version__(json_str)
685692
__download_list__(res, localDir, vault_token_file=token, auth_url=auth_url, client_id=client_id)
686-
693+
687694
# databus group
688695
elif group is not None:
689696
json_str = __get_json_ld_from_databus__(databusURI)
@@ -708,7 +715,7 @@ def download(
708715
# query as argument
709716
else:
710717
print("QUERY {}", databusURI.replace("\n", " "))
711-
if endpoint is None: # endpoint is required for queries (--databus)
718+
if endpoint is None: # endpoint is required for queries (--databus)
712719
raise ValueError("No endpoint given for query")
713720
res = __handle_databus_file_query__(endpoint, databusURI)
714721
__download_list__(res, localDir, vault_token_file=token, auth_url=auth_url, client_id=client_id)

0 commit comments

Comments
 (0)