Skip to content

Commit 91bf6ba

Browse files
committed
Add improved pyld integration as parser
1 parent fda61ba commit 91bf6ba

File tree

4 files changed

+190
-37
lines changed

4 files changed

+190
-37
lines changed

rdflib/plugins/parsers/pyld.py

Lines changed: 0 additions & 37 deletions
This file was deleted.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .parser import JSONLDParser
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
from io import StringIO, BufferedReader
2+
import json
3+
from typing import Union
4+
5+
import pyld
6+
from pyld import jsonld
7+
from pyld.jsonld import RDF_TYPE, _is_keyword, _is_absolute_iri
8+
9+
from rdflib import BNode, Graph, Literal, URIRef
10+
from rdflib.graph import DATASET_DEFAULT_GRAPH_ID
11+
from rdflib.parser import Parser, InputSource, BytesIOWrapper, PythonInputSource
12+
13+
from .to_rdf import to_rdf
14+
15+
16+
# Monkey patch pyld.
17+
pyld.jsonld.JsonLdProcessor.to_rdf = to_rdf
18+
19+
20+
class JSONLDParser(Parser):
21+
def parse(self, source: InputSource, sink: Graph) -> None:
22+
# TODO: Do we need to set up a document loader?
23+
# See https://github.com/digitalbazaar/pyld#document-loader
24+
# Using a document loader requires either Requests or aiohttp
25+
26+
def _graph_to_rdf(
27+
self: pyld.jsonld.JsonLdProcessor,
28+
pyld_graph_name: str,
29+
pyld_graph_dict: dict,
30+
issuer: pyld.jsonld.IdentifierIssuer,
31+
options: dict,
32+
):
33+
"""
34+
Creates an array of RDF triples for the given graph.
35+
36+
:param pyld_graph_name: the graph name of the triples.
37+
:param pyld_graph_dict: the graph to create RDF triples for.
38+
:param issuer: the IdentifierIssuer for issuing blank node identifiers.
39+
:param options: the RDF serialization options.
40+
41+
:return: the array of RDF triples for the given graph.
42+
"""
43+
triples = []
44+
45+
for id_, node in sorted(pyld_graph_dict.items()):
46+
for property, items in sorted(node.items()):
47+
if property == "@type":
48+
property = RDF_TYPE
49+
elif _is_keyword(property):
50+
continue
51+
52+
for item in items:
53+
# skip relative IRI subjects and predicates
54+
if not (_is_absolute_iri(id_) and _is_absolute_iri(property)):
55+
continue
56+
57+
# RDF subject
58+
subject = None
59+
if id_.startswith("_:"):
60+
subject = BNode(id_[2:])
61+
else:
62+
subject = URIRef(id_)
63+
64+
# RDF predicate
65+
predicate = None
66+
if property.startswith("_:"):
67+
# skip bnode predicates unless producing
68+
# generalized RDF
69+
if not options["produceGeneralizedRdf"]:
70+
continue
71+
predicate = BNode(property[2:])
72+
else:
73+
predicate = URIRef(property)
74+
75+
# convert list, value or node object to triple
76+
object = self._object_to_rdf(
77+
item, issuer, triples, options.get("rdfDirection")
78+
)
79+
# skip None objects (they are relative IRIs)
80+
if object is not None:
81+
if object["type"] == "IRI":
82+
o = URIRef(object["value"])
83+
elif object["type"] == "blank node":
84+
o = BNode(object["value"][2:])
85+
else:
86+
o = Literal(
87+
object["value"],
88+
datatype=URIRef(object["datatype"]),
89+
)
90+
91+
if pyld_graph_name == "@default":
92+
graph_name = DATASET_DEFAULT_GRAPH_ID
93+
elif pyld_graph_name.startswith("_:"):
94+
graph_name = BNode(pyld_graph_name[2:])
95+
else:
96+
graph_name = URIRef(pyld_graph_name)
97+
98+
sink.store.add(
99+
(
100+
subject,
101+
predicate,
102+
o,
103+
),
104+
graph_name,
105+
)
106+
107+
# Monkey patch pyld.
108+
pyld.jsonld.JsonLdProcessor._graph_to_rdf = _graph_to_rdf
109+
110+
if isinstance(source, PythonInputSource):
111+
data = source.data
112+
jsonld.to_rdf(data)
113+
else:
114+
stream: Union[
115+
StringIO, BytesIOWrapper, BufferedReader
116+
] = source.getByteStream()
117+
118+
if isinstance(stream, (StringIO, BytesIOWrapper, BufferedReader)):
119+
data = json.loads(stream.read())
120+
else:
121+
raise TypeError(f"Unhandled type for 'stream' as {type(stream)}.")
122+
123+
try:
124+
jsonld.to_rdf(data)
125+
finally:
126+
stream.close()
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import pyld
2+
from pyld.jsonld import (
3+
_is_string,
4+
_default_document_loader,
5+
ContextResolver,
6+
_resolved_context_cache,
7+
JsonLdError,
8+
IdentifierIssuer,
9+
_is_absolute_iri,
10+
)
11+
12+
13+
def to_rdf(self: pyld.jsonld.JsonLdProcessor, input_: dict, options: dict) -> None:
14+
"""
15+
Outputs the RDF dataset found in the given JSON-LD object.
16+
17+
:param input_: the JSON-LD input.
18+
:param options: the options to use.
19+
[base] the base IRI to use.
20+
[contextResolver] internal use only.
21+
[format] the format if input is a string:
22+
'application/n-quads' for N-Quads.
23+
[produceGeneralizedRdf] true to output generalized RDF, false
24+
to produce only standard RDF (default: false).
25+
[documentLoader(url, options)] the document loader
26+
(default: _default_document_loader).
27+
[rdfDirection] Only 'i18n-datatype' supported
28+
(default: None).
29+
30+
:return: the resulting RDF dataset (or a serialization of it).
31+
"""
32+
# set default options
33+
options = options.copy() if options else {}
34+
options.setdefault("base", input_ if _is_string(input_) else "")
35+
options.setdefault("produceGeneralizedRdf", False)
36+
options.setdefault("documentLoader", _default_document_loader)
37+
options.setdefault(
38+
"contextResolver",
39+
ContextResolver(_resolved_context_cache, options["documentLoader"]),
40+
)
41+
options.setdefault("extractAllScripts", True)
42+
options.setdefault("processingMode", "json-ld-1.1")
43+
44+
try:
45+
# expand input
46+
expanded = self.expand(input_, options)
47+
except JsonLdError as cause:
48+
raise JsonLdError(
49+
"Could not expand input before serialization to " "RDF.",
50+
"jsonld.RdfError",
51+
cause=cause,
52+
) from cause
53+
54+
# create node map for default graph (and any named graphs)
55+
issuer = IdentifierIssuer("_:b")
56+
node_map = {"@default": {}}
57+
self._create_node_map(expanded, node_map, "@default", issuer)
58+
59+
# output RDF dataset
60+
for graph_name, graph in sorted(node_map.items()):
61+
# skip relative IRIs
62+
if graph_name == "@default" or _is_absolute_iri(graph_name):
63+
self._graph_to_rdf(graph_name, graph, issuer, options)

0 commit comments

Comments
 (0)