|
| 1 | +from io import StringIO, BufferedReader |
| 2 | +import json |
| 3 | +from typing import Union |
| 4 | + |
| 5 | +import pyld |
| 6 | +from pyld import jsonld |
| 7 | +from pyld.jsonld import RDF_TYPE, _is_keyword, _is_absolute_iri |
| 8 | + |
| 9 | +from rdflib import BNode, Graph, Literal, URIRef |
| 10 | +from rdflib.graph import DATASET_DEFAULT_GRAPH_ID |
| 11 | +from rdflib.parser import Parser, InputSource, BytesIOWrapper, PythonInputSource |
| 12 | + |
| 13 | +from .to_rdf import to_rdf |
| 14 | + |
| 15 | + |
| 16 | +# Monkey patch pyld. |
| 17 | +pyld.jsonld.JsonLdProcessor.to_rdf = to_rdf |
| 18 | + |
| 19 | + |
| 20 | +class JSONLDParser(Parser): |
| 21 | + def parse(self, source: InputSource, sink: Graph) -> None: |
| 22 | + # TODO: Do we need to set up a document loader? |
| 23 | + # See https://github.com/digitalbazaar/pyld#document-loader |
| 24 | + # Using a document loader requires either Requests or aiohttp |
| 25 | + |
| 26 | + def _graph_to_rdf( |
| 27 | + self: pyld.jsonld.JsonLdProcessor, |
| 28 | + pyld_graph_name: str, |
| 29 | + pyld_graph_dict: dict, |
| 30 | + issuer: pyld.jsonld.IdentifierIssuer, |
| 31 | + options: dict, |
| 32 | + ): |
| 33 | + """ |
| 34 | + Creates an array of RDF triples for the given graph. |
| 35 | +
|
| 36 | + :param pyld_graph_name: the graph name of the triples. |
| 37 | + :param pyld_graph_dict: the graph to create RDF triples for. |
| 38 | + :param issuer: the IdentifierIssuer for issuing blank node identifiers. |
| 39 | + :param options: the RDF serialization options. |
| 40 | +
|
| 41 | + :return: the array of RDF triples for the given graph. |
| 42 | + """ |
| 43 | + triples = [] |
| 44 | + |
| 45 | + for id_, node in sorted(pyld_graph_dict.items()): |
| 46 | + for property, items in sorted(node.items()): |
| 47 | + if property == "@type": |
| 48 | + property = RDF_TYPE |
| 49 | + elif _is_keyword(property): |
| 50 | + continue |
| 51 | + |
| 52 | + for item in items: |
| 53 | + # skip relative IRI subjects and predicates |
| 54 | + if not (_is_absolute_iri(id_) and _is_absolute_iri(property)): |
| 55 | + continue |
| 56 | + |
| 57 | + # RDF subject |
| 58 | + subject = None |
| 59 | + if id_.startswith("_:"): |
| 60 | + subject = BNode(id_[2:]) |
| 61 | + else: |
| 62 | + subject = URIRef(id_) |
| 63 | + |
| 64 | + # RDF predicate |
| 65 | + predicate = None |
| 66 | + if property.startswith("_:"): |
| 67 | + # skip bnode predicates unless producing |
| 68 | + # generalized RDF |
| 69 | + if not options["produceGeneralizedRdf"]: |
| 70 | + continue |
| 71 | + predicate = BNode(property[2:]) |
| 72 | + else: |
| 73 | + predicate = URIRef(property) |
| 74 | + |
| 75 | + # convert list, value or node object to triple |
| 76 | + object = self._object_to_rdf( |
| 77 | + item, issuer, triples, options.get("rdfDirection") |
| 78 | + ) |
| 79 | + # skip None objects (they are relative IRIs) |
| 80 | + if object is not None: |
| 81 | + if object["type"] == "IRI": |
| 82 | + o = URIRef(object["value"]) |
| 83 | + elif object["type"] == "blank node": |
| 84 | + o = BNode(object["value"][2:]) |
| 85 | + else: |
| 86 | + o = Literal( |
| 87 | + object["value"], |
| 88 | + datatype=URIRef(object["datatype"]), |
| 89 | + ) |
| 90 | + |
| 91 | + if pyld_graph_name == "@default": |
| 92 | + graph_name = DATASET_DEFAULT_GRAPH_ID |
| 93 | + elif pyld_graph_name.startswith("_:"): |
| 94 | + graph_name = BNode(pyld_graph_name[2:]) |
| 95 | + else: |
| 96 | + graph_name = URIRef(pyld_graph_name) |
| 97 | + |
| 98 | + sink.store.add( |
| 99 | + ( |
| 100 | + subject, |
| 101 | + predicate, |
| 102 | + o, |
| 103 | + ), |
| 104 | + graph_name, |
| 105 | + ) |
| 106 | + |
| 107 | + # Monkey patch pyld. |
| 108 | + pyld.jsonld.JsonLdProcessor._graph_to_rdf = _graph_to_rdf |
| 109 | + |
| 110 | + if isinstance(source, PythonInputSource): |
| 111 | + data = source.data |
| 112 | + jsonld.to_rdf(data) |
| 113 | + else: |
| 114 | + stream: Union[ |
| 115 | + StringIO, BytesIOWrapper, BufferedReader |
| 116 | + ] = source.getByteStream() |
| 117 | + |
| 118 | + if isinstance(stream, (StringIO, BytesIOWrapper, BufferedReader)): |
| 119 | + data = json.loads(stream.read()) |
| 120 | + else: |
| 121 | + raise TypeError(f"Unhandled type for 'stream' as {type(stream)}.") |
| 122 | + |
| 123 | + try: |
| 124 | + jsonld.to_rdf(data) |
| 125 | + finally: |
| 126 | + stream.close() |
0 commit comments