# -*- coding: utf-8 -*-
'''
This module contains classes that implement
:class:`skosprovider.providers.VocabularyProvider` against the LOD version of
the Getty Vocabularies (AAT, TGN and ULAN).
.. note::
| At initialisation, the Getty providers will search which gvp-classes of the gvp-ontology are a subclass of skos-classes.
| This can cause a time delay of several seconds at startup.
'''
import requests
import warnings
import logging
from language_tags import tags
from requests.exceptions import ConnectionError
from skosprovider.exceptions import ProviderUnavailableException
from skosprovider.providers import VocabularyProvider
from skosprovider_getty.utils import (
uri_to_id, uri_to_graph,
conceptscheme_from_uri,
things_from_graph,
SubClassCollector,
GVP
)
log = logging.getLogger(__name__)
[docs]class GettyProvider(VocabularyProvider):
"""A provider that can work with the GETTY rdf files of
http://vocab.getty.edu/
"""
def __init__(self, metadata, **kwargs):
""" Constructor of the :class:`skosprovider_getty.providers.GettyProvider`
:param (dict) metadata: metadata of the provider
:param kwargs: arguments defining the provider.
* Typical arguments are `base_url`, `vocab_id` and `url`.
The `url` is a composition of the `base_url` and `vocab_id`
* You can also pass a custom :class:`skosprovider_getty.utils.SubClassCollector`
to override default behaviour with the subclasses keyword.
* You can also pass a custom requests session with the session keyword.
* The :class:`skosprovider_getty.providers.AATProvider`
is the default :class:`skosprovider_getty.providers.GettyProvider`
"""
if not 'default_language' in metadata:
metadata['default_language'] = 'en'
if 'subject' not in metadata:
metadata['subject'] = []
self.metadata = metadata
self.base_url = kwargs.get('base_url', 'http://vocab.getty.edu/')
self.vocab_id = kwargs.get('vocab_id', 'aat')
self.url = kwargs.get('url', self.base_url + self.vocab_id)
self.subclasses = kwargs.get('subclasses', SubClassCollector(GVP))
self.session = kwargs.get('session', requests.Session())
@property
def concept_scheme(self):
return self._get_concept_scheme()
def _get_concept_scheme(self):
return conceptscheme_from_uri(
self.url,
session=self.session
)
def _get_language(self, **kwargs):
if 'language' in kwargs:
return kwargs['language']
return self.metadata['default_language']
[docs] def get_by_id(self, id, change_notes=False):
""" Get a :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Collection` by id
:param (str) id: integer id of the :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Concept`
:return: corresponding :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Concept`.
Returns None if non-existing id
"""
graph = uri_to_graph('%s/%s.rdf' % (self.url, id), session=self.session)
if graph is False:
log.debug('Failed to retrieve data for %s/%s.rdf' % (self.url, id))
return False
# get the concept
things = things_from_graph(graph, self.subclasses, self.concept_scheme)
if len(things) == 0:
return False
c = things[0]
return c
[docs] def get_by_uri(self, uri, change_notes=False):
""" Get a :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Collection` by uri
:param (str) uri: string uri of the :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Concept`
:return: corresponding :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Concept`.
Returns None if non-existing id
"""
id = uri_to_id(uri)
return self.get_by_id(id, change_notes)
[docs] def find(self, query, **kwargs):
'''Find concepts that match a certain query.
Currently query is expected to be a dict, so that complex queries can
be passed. You can use this dict to search for concepts or collections
with a certain label, with a certain type and for concepts that belong
to a certain collection.
.. code-block:: python
# Find anything that has a label of church.
provider.find({'label': 'church'}
# Find all concepts that are a part of collection 5.
provider.find({'type': 'concept', 'collection': {'id': 5})
# Find all concepts, collections or children of these
# that belong to collection 5.
provider.find({'collection': {'id': 5, 'depth': 'all'})
:param query: A dict that can be used to express a query. The following
keys are permitted:
* `label`: Search for something with this label value. An empty \
label is equal to searching for all concepts.
* `type`: Limit the search to certain SKOS elements. If not \
present `all` is assumed:
* `concept`: Only return :class:`skosprovider.skos.Concept` \
instances.
* `collection`: Only return \
:class:`skosprovider.skos.Collection` instances.
* `all`: Return both :class:`skosprovider.skos.Concept` and \
:class:`skosprovider.skos.Collection` instances.
* `collection`: Search only for concepts belonging to a certain \
collection. This argument should be a dict with two keys:
* `id`: The id of a collection. Required.
* `depth`: Can be `members` or `all`. Optional. If not \
present, `members` is assumed, meaning only concepts or \
collections that are a direct member of the collection \
should be considered. When set to `all`, this method \
should return concepts and collections that are a member \
of the collection or are a narrower concept of a member \
of the collection.
:returns: A :class:`lst` of concepts and collections. Each of these
is a dict with the following keys:
* id: id within the conceptscheme
* uri: :term:`uri` of the concept or collection
* type: concept or collection
* label: A label to represent the concept or collection. It is \
determined by looking at the `**kwargs` parameter, the default \
language of the provider and finally falls back to `en`.
'''
# # interprete and validate query parameters (label, type and collection)
# Label
label = None
if 'label' in query:
label = query['label']
# Type: 'collection','concept' or 'all'
type_c = 'all'
if 'type' in query:
type_c = query['type']
if type_c not in ('all', 'concept', 'collection'):
raise ValueError("type: only the following values are allowed: 'all', 'concept', 'collection'")
#Collection to search in (optional)
coll_id = None
coll_depth = None
if 'collection' in query:
coll = query['collection']
if not 'id' in coll:
raise ValueError("collection: 'id' is required key if a collection-dictionary is given")
coll_id = coll['id']
coll_depth = 'members'
if 'depth' in coll:
coll_depth = coll['depth']
if coll_depth not in ('members', 'all'):
raise ValueError(
"collection - 'depth': only the following values are allowed: 'members', 'all'")
#build sparql query
coll_x = ""
if coll_id is not None and coll_depth == 'all':
coll_x = "gvp:broaderExtended " + self.vocab_id + ":" + coll_id + ";"
elif coll_id is not None and coll_depth == 'members':
coll_x = "gvp:broader " + self.vocab_id + ":" + coll_id + ";"
type_values = "((?Type = skos:Concept) || (?Type = skos:Collection))"
if type_c == 'concept':
type_values = "(?Type = skos:Concept)"
elif type_c == 'collection':
type_values = "(?Type = skos:Collection)"
query = """
SELECT ?Subject ?Term ?Type ?Id (lang(?Term) as ?Lang) {
?Subject rdf:type ?Type; dc:identifier ?Id; %s skos:inScheme %s:; %s.
OPTIONAL {
{?Subject xl:prefLabel [skosxl:literalForm ?Term]}
}
FILTER(%s)
}""" % (self._build_keywords(label), self.vocab_id, coll_x, type_values)
ret= self._get_answer(query, **kwargs)
language = self._get_language(**kwargs)
sort = self._get_sort(**kwargs)
sort_order = self._get_sort_order(**kwargs)
return self._sort(ret, sort, language, sort_order == 'desc')
[docs] def get_all(self, **kwargs):
"""
Not supported: This provider does not support this. The amount of results is too large
"""
warnings.warn(
'This provider does not support this. The amount of results is too large',
UserWarning
)
return False
def _get_answer(self, query, **kwargs):
# send request to getty
""" Returns the results of the Sparql query to a :class:`lst` of concepts and collections.
The return :class:`lst` can be empty.
:param query (str): Sparql query
:returns: A :class:`lst` of concepts and collections. Each of these
is a dict with the following keys:
* id: id within the conceptscheme
* uri: :term:`uri` of the concept or collection
* type: concept or collection
* label: A label to represent the concept or collection.
"""
request = self.base_url + "sparql.json"
try:
res = self.session.get(request, params={"query": query})
except ConnectionError as e:
raise ProviderUnavailableException("Request could not be executed - Request: %s - Params: %s" % (request, query))
if res.status_code == 404:
raise ProviderUnavailableException("Service not found (status_code 404) - Request: %s - Params: %s" % (request, query))
if not res.encoding:
res.encoding = 'utf-8'
r = res.json()
d = {}
for result in r["results"]["bindings"]:
uri = result["Subject"]["value"]
if "Term" in result:
label = result["Term"]["value"]
else:
label = "<not available>"
item = {
'id': result["Id"]["value"],
'uri': uri,
'type': result["Type"]["value"].rsplit('#', 1)[1],
'label': label,
'lang': result["Lang"]["value"]
}
if uri not in d:
d[uri] = item
if tags.tag(d[uri]['lang']).format == tags.tag(self._get_language(**kwargs)).format:
pass
elif tags.tag(item['lang']).format == tags.tag(self._get_language(**kwargs)).format:
d[uri] = item
elif tags.tag(item['lang']).language and (tags.tag(item['lang']).language.format == tags.tag(self._get_language()).language.format):
d[uri] = item
elif tags.tag(item['lang']).format == tags.tag('en').format:
d[uri] = item
return list(d.values())
def _get_top(self, type='All', **kwargs):
""" Returns all top-level facets. The returned values depend on the given type:
Concept or All (Concepts and Collections). Default All is used.
:param (str) type: Concepts or All (Concepts and Collections) top facets to return
:return: A :class:`lst` of concepts (and collections).
"""
if type == "concepts" :
type_values = "(?Type = skos:Concept)"
else:
type_values = "((?Type = skos:Concept) || (?Type = skos:Collection))"
query = """SELECT ?Subject ?Id ?Type ?Term (lang(?Term) as ?Lang)
{
?Subject a gvp:Facet; rdf:type ?Type;
dc:identifier ?Id; skos:inScheme %s:;.
OPTIONAL {
{?Subject xl:prefLabel [skosxl:literalForm ?Term]}
}
FILTER (%s)
}""" % (self.vocab_id, type_values)
ret= self._get_answer(query, **kwargs)
language = self._get_language(**kwargs)
sort = self._get_sort(**kwargs)
sort_order = self._get_sort_order(**kwargs)
return self._sort(ret, sort, language, sort_order == 'desc')
[docs] def get_top_concepts(self, **kwargs):
""" Returns all concepts that form the top-level of a display hierarchy.
:return: A :class:`lst` of concepts.
"""
return self._get_top("concepts", **kwargs)
[docs] def get_top_display(self, **kwargs):
""" Returns all concepts or collections that form the top-level of a display hierarchy.
:return: A :class:`lst` of concepts and collections.
"""
return self._get_top(**kwargs)
[docs] def get_children_display(self, id, **kwargs):
""" Return a list of concepts or collections that should be displayed under this concept or collection.
:param str id: A concept or collection id.
:returns: A :class:`lst` of concepts and collections.
"""
broader = 'broader'
type_values = "((?Type = skos:Concept) || (?Type = skos:Collection))"
query = """SELECT ?Subject ?Id ?Type ?Term (lang(?Term) as ?Lang)
{
?Subject rdf:type ?Type;
dc:identifier ?Id; skos:inScheme %s:; gvp:%s %s:%s;.
OPTIONAL {
{?Subject xl:prefLabel [skosxl:literalForm ?Term]}
}
FILTER(%s)
}""" % (self.vocab_id, broader, self.vocab_id, id, type_values)
ret= self._get_answer(query, **kwargs)
language = self._get_language(**kwargs)
sort = self._get_sort(**kwargs)
sort_order = self._get_sort_order(**kwargs)
return self._sort(ret, sort, language, sort_order == 'desc')
[docs] def expand(self, id):
""" Expand a concept or collection to all it's narrower concepts.
If the id passed belongs to a :class:`skosprovider.skos.Concept`,
the id of the concept itself should be include in the return value.
:param str id: A concept or collection id.
:returns: A :class:`lst` of id's. Returns false if the input id does not exists
"""
query = """SELECT DISTINCT ?Id{
{
?Subject dc:identifier ?Id; skos:inScheme %s:; gvp:broaderExtended %s;.
}
UNION
{
VALUES ?Id {'%s'}
?Subject dc:identifier ?Id; skos:inScheme %s:; rdf:type skos:Concept.
}
}
""" % (self.vocab_id, self.vocab_id + ":" + id, id, self.vocab_id)
print (query)
res = self.session.get(self.base_url + "sparql.json", params={"query": query})
res.encoding = 'utf-8'
r = res.json()
result = [result['Id']['value'] for result in r['results']['bindings']]
if len(result) == 0 and self.get_by_id(id) is False:
return False
return result
def _build_keywords(self, label):
if label is None:
return ""
keyword_list = label.split(" ")
keywords = ""
for idx, item in enumerate(keyword_list):
if idx + 1 == len(keyword_list):
keywords = keywords + item
else:
keywords = keywords + item + " AND "
return "luc:term '" + keywords + "';"
def _sort(self, items, sort, language='en', reverse=False):
if sort is None:
sort = 'id'
if sort == 'sortlabel':
sort='label'
items.sort(key=lambda item: item[sort], reverse=reverse)
return items
[docs]class AATProvider(GettyProvider):
""" The Art & Architecture Thesaurus Provider
A provider that can work with the GETTY AAT rdf files of
http://vocab.getty.edu/aat
"""
def __init__(self, metadata, **kwargs):
""" Inherit functions of the getty provider using url http://vocab.getty.edu/aat
"""
GettyProvider.__init__(
self,
metadata,
base_url='http://vocab.getty.edu/',
vocab_id='aat',
**kwargs
)
[docs]class TGNProvider(GettyProvider):
""" The Getty Thesaurus of Geographic Names
A provider that can work with the GETTY TGN rdf files of
http://vocab.getty.edu/tgn
"""
def __init__(self, metadata, **kwargs):
""" Inherit functions of the getty provider using url http://vocab.getty.edu/tgn
"""
GettyProvider.__init__(
self,
metadata,
base_url='http://vocab.getty.edu/',
vocab_id='tgn',
**kwargs
)
[docs]class ULANProvider(GettyProvider):
""" Union List of Artist Names
A provider that can work with the GETTY ULAN rdf files of
http://vocab.getty.edu/ulan
"""
def __init__(self, metadata, **kwargs):
""" Inherit functions of the getty provider using url http://vocab.getty.edu/ulan
"""
GettyProvider.__init__(
self,
metadata,
base_url='http://vocab.getty.edu/',
vocab_id='ulan',
**kwargs
)