Source code for clldutils.metadata

"""
JSON-LD - the serialization format used for metadata in CLDF datasets - supports nested data.
To make creating (and reading) this data simpler, this module provides a Python API to build data
structures which "know" how to read from/serialize to JSON-LD.

Usage:

.. code-block:: python

    >>> from clldutils.metadata import *
    >>> md = Metadata(
    ...     title='The Data',
    ...     publisher=Publisher(name='Data Press', place='anywhere'),
    ...     license=License(name='CC-BY-4.0'))
    >>> md.to_jsonld()['dc:license']
    OrderedDict([('name', 'Creative Commons Attribution 4.0'),
                 ('url', 'https://creativecommons.org/licenses/by/4.0/'),
                 ('icon', 'cc-by.png')])
    >>> Metadata.from_jsonld(md.to_jsonld()).publisher.place
    'anywhere'
"""
import collections
import urllib.parse

import attr

from clldutils import licenses

__all__ = ['Publisher', 'License', 'Metadata']


[docs]@attr.s class Publisher: """ The entity publishing a dataset. :ivar name: Name of the publisher. :ivar place: Place or address of the publisher, used in "traditional" publisher formats. :ivar url: URL linking to the "homepage" of the publisher. :ivar contact: An email address under which to contact the publisher of a dataset. """ name = attr.ib( metadata=dict(ldkey="http://xmlns.com/foaf/0.1/name"), default=None) place = attr.ib( metadata=dict(ldkey="dc:Location"), default=None) url = attr.ib( metadata=dict(ldkey="http://xmlns.com/foaf/0.1/homepage"), default=None) contact = attr.ib( metadata=dict(ldkey="http://xmlns.com/foaf/0.1/mbox"), default=None)
[docs]@attr.s class License: """ The license under which a dataset is published, characterized with name, URL and an icon. """ name = attr.ib( default="Creative Commons Attribution 4.0 International License") url = attr.ib( default="https://creativecommons.org/licenses/by/4.0/") icon = attr.ib( default="cc-by.png") def __attrs_post_init__(self): lic = licenses.find(self.name) if lic: self.name = lic.name self.url = lic.url
[docs]@attr.s class Metadata: """ Metadata about the published version(s) of a dataset. :ivar Publisher publisher: The organisation or institution publishing the dataset. :ivar License license: The license under which the dataset can be used. :ivar str url: A URL under which the dataset can be browsed. :ivar str title: The title of the dataset. :ivar str description: """ publisher = attr.ib(default=Publisher(), validator=attr.validators.instance_of(Publisher)) license = attr.ib(default=License(), validator=attr.validators.instance_of(License)) url = attr.ib(default=None) title = attr.ib(default=None) description = attr.ib(default=None) @classmethod def from_jsonld(cls, d, defaults=None): defaults = defaults or {} kw = {} for k, v in [ ('dcat:accessURL', 'url'), ('dc:title', 'title'), ('dc:description', 'description'), ]: val = d.get(k) or defaults.get(v) if val: kw[v] = val for ldkey, cls_ in [('dc:publisher', Publisher), ('dc:license', License)]: ckw = {} dd = d.get(ldkey, {}) for f in attr.fields(cls_): ckw[f.name] = dd.get(f.metadata.get('ldkey', f.name)) \ or defaults.get('{0}.{1}'.format(ldkey.split(':')[1], f.name)) kw[cls_.__name__.lower()] = cls_(**{k: v for k, v in ckw.items() if v}) return cls(**kw)
[docs] def to_jsonld(self) -> collections.OrderedDict: """ Returns a `dict` suitable for serialization as JSON-LD object, with the metadata tagged with suitable common properties. """ items = [("@context", ["http://www.w3.org/ns/csvw", {"@language": "en"}])] for k, v in [ ('dcat:accessURL', 'url'), ('dc:title', 'title'), ('dc:description', 'description'), ]: if getattr(self, v): items.append((k, getattr(self, v))) for ldkey, cls_ in [('dc:publisher', Publisher), ('dc:license', License)]: obj = getattr(self, ldkey.split(':')[1]) json = collections.OrderedDict() for f in attr.fields(cls_): if getattr(obj, f.name): json[f.metadata.get('ldkey', f.name)] = getattr(obj, f.name) items.append((ldkey, json)) return collections.OrderedDict(items)
@property def domain(self): return urllib.parse.urlparse(self.url).netloc