diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..aa21393
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,13 @@
+Copyright 2011 Ronan Klyne
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..c62a927
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,4 @@
+include config.ini
+include LICENSE.txt
+recursive-include graphite\Jena-2.6.4 *.txt *.jar
+recursive-include graphite *.txt *.ini
diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..e2dd253
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,19 @@
+
+
+Overview
+========
+
+Information on this project is currently being maintained at https://github.com/rklyne/python-graphite
+
+All comments gratefully recieved at python-graphite@rklyne.net
+
+
+Dependencies
+============
+This package depends on Python >=2.6 and JPype - http://sourceforge.net/projects/jpype/.
+
+Installation command:
+====================
+
+python setup.py install
+
diff --git a/_cgi.py b/examples/_cgi.py
old mode 100644
new mode 100755
similarity index 100%
rename from _cgi.py
rename to examples/_cgi.py
diff --git a/examples/explorer.py b/examples/explorer.py
new file mode 100755
index 0000000..4427394
--- /dev/null
+++ b/examples/explorer.py
@@ -0,0 +1,328 @@
+
+import cgi
+import cgitb
+cgitb.enable()
+
+fs = cgi.FieldStorage()
+
+class Response(object):
+ def __call__(self, data, type="text/html"):
+ data = str(data)
+ import sys
+ w = sys.stdout.write
+ def wl(text):
+ w(text+"\n")
+ wl("Status: 200 OK")
+ wl("Content-Type: " + type)
+ wl('')
+ w(data)
+ sys.stdout.flush()
+ import time
+ time.sleep(1)
+ sys.exit()
+
+import rdfgraph
+
+def main():
+ respond = Response()
+ import os
+
+ path = os.environ.get('PATH_INFO', '')
+ if path.endswith('/schema'):
+ return schema_explorer(respond)
+
+ if 'type' in fs:
+ show_data(respond)
+ else:
+ landing_page(respond)
+
+HTML = """
+
+
+Semantic web explorer
+
+
+
+
+
+%s
+
+
+"""
+
+def landing_page(respond):
+ respond(HTML % (
+ """RDF data explorer
+ This aims to provide the most useful summary possible of any RDF data source.
+
+ """
+ ))
+
+
+def chart(data, caption="", threshold=0.95):
+ "Renders an HTML chart. Data maps name to value."
+ lst = [(c, p) for (p, c) in data.items()]
+ lst.sort()
+ lst.reverse()
+ total = sum([c for (c, p) in lst])
+ if total == 0:
+ return caption+" No data!"
+ target = total * threshold
+ total = 0
+ for i, (c, p) in enumerate(lst):
+ total += c
+ if total > target:
+ break
+ i += 1
+ i = min(i, 8)
+ if i != len(lst):
+ extras = lst[i:]
+ extras_total = sum([c for (c, p) in extras])
+ lst = lst[:i]
+ lst.append((extras_total, "Others"))
+
+
+ return '\n'+caption+'' \
+ + ' | Count |
' \
+ + '
\n'.join(['| %s | %s | ' % (p, c) for (c, p) in lst]) \
+ + '
'
+
+
+def show_data(respond):
+ g = rdfgraph.Graph()
+ data_type = fs['type'].value
+ url = fs['url'].value
+
+ if url is None:
+ raise RuntimeError
+ if data_type == 'sparql':
+ g.add_endpoint(url)
+ elif data_type == 'http':
+ format = None
+ if url.endswith('.ttl'):
+ format = 'ttl'
+ g.load(url, format=format)
+ else:
+ return landing_page(respond)
+
+ def quote(thing):
+ return cgi.escape(unicode(thing))
+
+ result = ''
+ g.describe("<%s>" % (url,))
+
+ resource_count = int(g.sparql(" SELECT ( COUNT ( DISTINCT ?x ) AS ?c ) WHERE { ?x ?y ?z } ").count('c'))
+ property_count = int(g.sparql("select (count(distinct ?y) as ?c) where {?x ?y ?z}").count('c'))
+ object_count = int(g.sparql("select (count(distinct ?z) as ?c) where {?x ?y ?z}").count('c'))
+ triple_count = int(g.sparql("select (count(?z) as ?c) where {?x ?y ?z}").count('c'))
+ type_count = int(g.sparql("select (count(distinct ?z) as ?c) where {?x a ?z}").count('c'))
+ typed_resource_count = int(g.sparql("select (count(distinct ?x) as ?c) where {?x a ?z}").count('c'))
+
+ actions = [
+ (0, 'triples'),
+ (type_count, 'type'),
+ (property_count, 'property'),
+ (object_count, 'object'),
+ (resource_count, 'resource'),
+ ]
+ actions.sort()
+
+ for weight, action in actions:
+ if weight > 150:
+ result += "Too many %ss to summarise
"%action
+ continue
+ else:
+ result += "%s
"%action
+
+ if action == 'triples':
+ result += '' + chart({
+ 'Untyped resources': resource_count-typed_resource_count,
+ 'Typed resources': typed_resource_count,
+ 'Properties': property_count,
+ 'Objects': object_count,
+ }, caption="Unique URI counts", threshold=2)
+
+ explore_typed = False
+ if resource_count:
+ if typed_resource_count/resource_count < 0.1:
+ result += "Less than 10% of resources are typed. Maybe start looking there?
"
+ explore_typed = True
+
+ prop_to_res = resource_count/property_count
+ if prop_to_res < 2:
+ result += "There are nearly as many properties as resources. This is a web.
"
+ if prop_to_res > 5:
+ result += "There are several properties on each resource. This is concentrated information.
"
+
+ result += '''
+ '''
+
+ elif action == 'property':
+ if True:
+ props = dict([
+ (g.shrink_uri(d.get('y', '')), d['c'])
+ for d in
+ g.sparql("select ?y (count(?x) as ?c) where {?x ?y ?z} group by ?y order by desc(?c) limit 10")
+ if 'y' in d
+ ])
+ else:
+ ps = g.sparql("select distinct ?y where {?x ?y ?z} limit 50")['y']
+ props = {}
+ for p in ps:
+ resultlist = g.sparql("select (count(?x) as ?c) where {?x <%s> ?z}" % (p,))
+ c = resultlist.count('c')
+ props[p.shrink_uri()] = c
+ if props:
+ result += chart(props, caption="Property frequencies")
+
+ elif action == 'resource':
+
+ rs = g.sparql("select distinct ?x where {?x ?y ?z} limit 150")['x']
+ result += "" + str(len(list(rs))) + ' - ' + ', '.join(map(quote, rs)) + '
'
+
+
+ elif action == 'type':
+
+###
+ # for d in g.sparql("select ?z (count(distinct ?x) as ?c) where {?x a ?z} group by ?z order by desc(?c) limit 10"):
+ # raise RuntimeError(d)
+###
+
+ if True:
+ types = dict([
+ (d['z'], d['c'])
+ for d in
+ g.sparql("select ?z (count(distinct ?x) as ?c) where {?x a ?z} group by ?z order by desc(?c) limit 10")
+ if 'z' in d
+ ])
+ else:
+ ts = g.sparql("select distinct ?y where {?x a ?y} limit 50")['y']
+ types = {}
+ for t in ts:
+ resultlist = g.sparql("select (count(distinct ?x) as ?c) where {?x a <%s>}" % (t,))
+ c = resultlist.count('c')
+ types[t.short_html()] = int(c)
+ if types:
+ result += "Types (%s total)
\n" %type_count
+ result += chart(types, caption="Type frequencies")
+
+ elif action == 'object':
+ result += "Object summary not written
"
+
+ else:
+ raise RuntimeError("unknown action", action)
+
+ respond(
+ HTML % (
+ "%s
\n" % quote(url)
+ + result,
+ )
+ )
+
+
+
+def schema_explorer(respond):
+ if 'url' not in fs or 'json' not in fs:
+ #raise RuntimeError(fs.getvalue('url', None))
+ respond(HTML %"""RDF Schema explorer
+ (Reset)
+
+
+
+
+
+ Built using python-graphite and JavaScript InfoViz Toolkit
+
""")
+ else:
+ respond(schema_json(), type="application/json")
+
+
+def schema_json():
+ g = rdfgraph.Graph()
+ g.add_inference('schema')
+ url = fs['url'].value
+ prop = None
+ if 'property' in fs:
+ prop = fs['property'].value
+
+ format = 'ttl'
+ if url.endswith('.ttl'):
+ format = 'ttl'
+ g.load(url, format=format)
+ r = g[url]
+
+ if prop:
+ # defunct
+ data = {
+ 'subject': url,
+ 'property': prop,
+ 'values': [{
+ 'id': n.expand_uri(),
+ 'name': n.shrink_uri(),
+ } for n in g[url].all(prop)],
+ }
+
+ ns = r.get_ns()
+
+ import json
+
+ properties = []
+ data = {
+ 'name': r['rdfs:label'],
+ 'properties': properties,
+ }
+ domain_of = list(r.all('-rdfs:domain'))
+ for p in domain_of:
+ for r in p.all('rdfs:range'):
+ properties.append({
+ 'id': str(r),
+ 'name': "(" + p.shrink_uri() + ")" \
+ + r.shrink_uri() if hasattr(r, 'shrink_uri') else r,
+ })
+ return json.dumps(data)
diff --git a/run.py b/examples/run.py
old mode 100644
new mode 100755
similarity index 91%
rename from run.py
rename to examples/run.py
index 2061328..0ef929d
--- a/run.py
+++ b/examples/run.py
@@ -1,7 +1,9 @@
-import rdfgraph
+#!/usr/bin/env python
+
+import graphite
def main():
- e = rdfgraph.Graph()
+ e = graphite.Graph()
uri = "http://webscience.org/person/2.n3"
# uri = 'http://id.ecs.soton.ac.uk/person/1650'
e.load(uri)
@@ -19,7 +21,7 @@ def main():
print "People"
uri = "http://webscience.org/people.n3"
- g = rdfgraph.Graph().load(uri)
+ g = graphite.Graph().load(uri)
names = []
for person in g.all_of_type('foaf:Person').sort('foaf:family_name'):
print "-"*40
@@ -28,7 +30,7 @@ def main():
print ', '.join(map(str, names))
- print rdfgraph.Graph(). \
+ print graphite.Graph(). \
load("http://webscience.org/people"). \
sparql("PREFIX foaf: SELECT * WHERE { ?person a foaf:Person } LIMIT 5") \
['person']['foaf:name'].join(', ') \
@@ -42,7 +44,7 @@ def main2():
# Try playing with some Linked4 local govt. data
# ( http://linked4.org/lsd/ )
#
- graph = rdfgraph.Graph()
+ graph = graphite.Graph()
graph.load_sparql(
"http://linked4.org/lsd/sparql",
"""
@@ -63,7 +65,7 @@ def main3():
# Try playing with some Linked4 local govt. data
# ( http://linked4.org/lsd/ )
#
- data = rdfgraph.Dataset()
+ data = graphite.Dataset()
data.add_endpoint("http://linked4.org/lsd/sparql")
data.add_endpoint(dbpedia)
# Royal Borough of Windsor and Maidenhead
@@ -82,7 +84,7 @@ def main3():
def main4():
- graph = rdfgraph.Dataset()
+ graph = graphite.Dataset()
graph.add_endpoint("http://services.data.gov.uk/reference/sparql")
# graph.add_endpoint("http://linked4.org/lsd/sparql")
@@ -99,7 +101,7 @@ def main4():
def explore_types():
- graph = rdfgraph.Graph()
+ graph = graphite.Graph()
graph.add_endpoint("http://linked4.org/lsd/sparql")
print graph.all_types().get('rdfs:label').join(', ')
diff --git a/Jena-2.6.4/README.txt b/graphite/Jena-2.6.4/README.txt
similarity index 100%
rename from Jena-2.6.4/README.txt
rename to graphite/Jena-2.6.4/README.txt
diff --git a/Jena-2.6.4/ReleaseNotes.txt b/graphite/Jena-2.6.4/ReleaseNotes.txt
similarity index 100%
rename from Jena-2.6.4/ReleaseNotes.txt
rename to graphite/Jena-2.6.4/ReleaseNotes.txt
diff --git a/Jena-2.6.4/copyright.txt b/graphite/Jena-2.6.4/copyright.txt
similarity index 100%
rename from Jena-2.6.4/copyright.txt
rename to graphite/Jena-2.6.4/copyright.txt
diff --git a/graphite/Jena-2.6.4/lib/arq-2.8.7.jar b/graphite/Jena-2.6.4/lib/arq-2.8.7.jar
new file mode 100644
index 0000000..3040a99
Binary files /dev/null and b/graphite/Jena-2.6.4/lib/arq-2.8.7.jar differ
diff --git a/Jena-2.6.4/lib/arq-2.8.8.jar b/graphite/Jena-2.6.4/lib/arq-2.8.8.jar
similarity index 100%
rename from Jena-2.6.4/lib/arq-2.8.8.jar
rename to graphite/Jena-2.6.4/lib/arq-2.8.8.jar
diff --git a/Jena-2.6.4/lib/icu4j-3.4.4.jar b/graphite/Jena-2.6.4/lib/icu4j-3.4.4.jar
similarity index 100%
rename from Jena-2.6.4/lib/icu4j-3.4.4.jar
rename to graphite/Jena-2.6.4/lib/icu4j-3.4.4.jar
diff --git a/Jena-2.6.4/lib/iri-0.8.jar b/graphite/Jena-2.6.4/lib/iri-0.8.jar
similarity index 100%
rename from Jena-2.6.4/lib/iri-0.8.jar
rename to graphite/Jena-2.6.4/lib/iri-0.8.jar
diff --git a/Jena-2.6.4/lib/jena-2.6.4.jar b/graphite/Jena-2.6.4/lib/jena-2.6.4.jar
similarity index 100%
rename from Jena-2.6.4/lib/jena-2.6.4.jar
rename to graphite/Jena-2.6.4/lib/jena-2.6.4.jar
diff --git a/Jena-2.6.4/lib/log4j-1.2.13.jar b/graphite/Jena-2.6.4/lib/log4j-1.2.13.jar
similarity index 100%
rename from Jena-2.6.4/lib/log4j-1.2.13.jar
rename to graphite/Jena-2.6.4/lib/log4j-1.2.13.jar
diff --git a/Jena-2.6.4/lib/slf4j-api-1.5.8.jar b/graphite/Jena-2.6.4/lib/slf4j-api-1.5.8.jar
similarity index 100%
rename from Jena-2.6.4/lib/slf4j-api-1.5.8.jar
rename to graphite/Jena-2.6.4/lib/slf4j-api-1.5.8.jar
diff --git a/Jena-2.6.4/lib/slf4j-log4j12-1.5.8.jar b/graphite/Jena-2.6.4/lib/slf4j-log4j12-1.5.8.jar
similarity index 100%
rename from Jena-2.6.4/lib/slf4j-log4j12-1.5.8.jar
rename to graphite/Jena-2.6.4/lib/slf4j-log4j12-1.5.8.jar
diff --git a/Jena-2.6.4/lib/stax-api-1.0.1.jar b/graphite/Jena-2.6.4/lib/stax-api-1.0.1.jar
similarity index 100%
rename from Jena-2.6.4/lib/stax-api-1.0.1.jar
rename to graphite/Jena-2.6.4/lib/stax-api-1.0.1.jar
diff --git a/graphite/Jena-2.6.4/lib/wstx-asl-3.2.9.jar b/graphite/Jena-2.6.4/lib/wstx-asl-3.2.9.jar
new file mode 100644
index 0000000..ffdbd1f
Binary files /dev/null and b/graphite/Jena-2.6.4/lib/wstx-asl-3.2.9.jar differ
diff --git a/Jena-2.6.4/lib/xercesImpl-2.7.1.jar b/graphite/Jena-2.6.4/lib/xercesImpl-2.7.1.jar
similarity index 100%
rename from Jena-2.6.4/lib/xercesImpl-2.7.1.jar
rename to graphite/Jena-2.6.4/lib/xercesImpl-2.7.1.jar
diff --git a/Jena-2.6.4/readme.html b/graphite/Jena-2.6.4/readme.html
similarity index 100%
rename from Jena-2.6.4/readme.html
rename to graphite/Jena-2.6.4/readme.html
diff --git a/graphite/LICENSE.txt b/graphite/LICENSE.txt
new file mode 100644
index 0000000..aa21393
--- /dev/null
+++ b/graphite/LICENSE.txt
@@ -0,0 +1,13 @@
+Copyright 2011 Ronan Klyne
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/graphite/__init__.py b/graphite/__init__.py
new file mode 100644
index 0000000..f577b4c
--- /dev/null
+++ b/graphite/__init__.py
@@ -0,0 +1,2 @@
+
+from rdfgraph import Graph, Dataset, Endpoint, Config
diff --git a/config.ini b/graphite/config.ini
similarity index 100%
rename from config.ini
rename to graphite/config.ini
diff --git a/rdfgraph.py b/graphite/rdfgraph.py
similarity index 83%
rename from rdfgraph.py
rename to graphite/rdfgraph.py
index 2a0598b..397c58b 100644
--- a/rdfgraph.py
+++ b/graphite/rdfgraph.py
@@ -2,6 +2,8 @@
Ripped off from Chris Gutteridge's Graphite: http://graphite.ecs.soton.ac.uk/
"""
+from __future__ import print_function
+
# CONFIG! (finally)
class Config(object):
@@ -16,12 +18,16 @@ def __init__(self):
def load(self):
import os
base_dir = os.path.dirname(__file__)
+ work_dir = os.getcwd()
import ConfigParser
cp = ConfigParser.SafeConfigParser(defaults={
'jena_libs': 'jena/libs',
'jvm_lib': None,
})
- cp.read(self.config_files)
+ cp.read(map(
+ lambda name: os.path.join(base_dir, name),
+ self.config_files,
+ ))
libs_cfg = cp.get('config', 'jena_libs')
if libs_cfg:
@@ -46,7 +52,7 @@ def load(self):
cache_dir = cp.get('config', 'cache_dir')
except:
cache_dir = self.cache_dir
- self.cache_dir = os.path.join(base_dir, cache_dir)
+ self.cache_dir = os.path.join(work_dir, cache_dir)
Config = Config()
@@ -88,7 +94,7 @@ def parse_list(self, tpl):
yield item
elif isinstance(item, (str, unicode)):
# Assume it's a URI. Maybe add some literal support later.
- yield self[item]
+ yield self.resource(item)
else:
yield item
@@ -130,7 +136,7 @@ def get_path(self, name):
return self.index[name]
def get(self, name):
if name not in self.index:
- raise KeyError, name
+ raise KeyError(name)
with self.open(self.index[name], 'rb') as f:
return f.read().decode('utf-8')
__getitem__ = get
@@ -219,17 +225,27 @@ def __init__(self, uri=None, namespaces=None, engine=None):
if namespaces:
self.add_ns(namespaces)
+ @classmethod
+ def get_default_engine_class(cls):
+ return getattr(Graph, '_default_graph_class', JenaGraph)
+ @classmethod
+ def use_jena(cls):
+ Graph._default_graph_class = JenaGraph
+ @classmethod
+ def use_rdflib(cls):
+ Graph._default_graph_class = RdflibGraph
+
def create_default_engine(self):
- return JenaGraph()
+ return self.get_default_engine_class()()
@takes_list
def read_uri(self, lst, allow_error=False, _cache=[], **k):
reload = k.get('reload', False)
assert lst, "Load what?"
for datum in lst:
- assert getattr(datum, 'isURIResource', False), "Can't load " +`datum`
+ assert getattr(datum, 'isURIResource', False), "Can't load {0!r}".format(datum)
try:
- self._load_uri(datum.uri, reload=reload, format=k.get('format', None))
+ self._load_uri(datum.uri(), reload=reload, format=k.get('format', None))
except:
if not allow_error:
raise
@@ -247,6 +263,7 @@ def _sniff_format(self, data, type=None):
return TURTLE
elif type in [
'application/rdf+xml',
+ 'text/xml',
]:
return RDFXML
elif type in [
@@ -276,15 +293,18 @@ def _load_uri(self, uri, **k):
if 'format' in k:
k['format'] = self._parse_rdf_format(k['format'])
# Strip the fragment from this URI before caching it.
+ assert isinstance(uri, (str, unicode)), uri
import urlparse
uri_key = ''.join(urlparse.urlparse(uri)[:5])
if not reload and uri_key in self.loaded: return
self.loaded[uri_key] = True
+ # I preferred turtle here, but RDFXML seems more robust with dodgy input data.
+ CACHE_FORMAT = RDFXML
if uri in self.web_cache:
try:
- self.import_uri('file:///'+self.web_cache.get_path(uri), format=TURTLE)
+ self.import_uri('file:///'+self.web_cache.get_path(uri), format=CACHE_FORMAT)
except:
- print "Error getting <"+uri+"> from cache"
+ print("Error getting <"+uri+"> from cache")
raise
else:
import urllib2
@@ -304,15 +324,36 @@ def _load_uri(self, uri, **k):
raise RuntimeError("Got HTML data", uri, data, mime)
data += f.read()
data = data.decode(enc)
- self.web_cache[uri] = data
+ self.engine.load_text(data, format)
+ # Then write the data to the cache.
g = Graph()
- g.import_uri('file:///'+self.web_cache.get_path(uri), format=format)
- data = g.to_string(format=TURTLE)
-# raise RuntimeError(data)
- reloaded = g.engine.load_text(data, format=TURTLE)
- assert reloaded, reloaded
- self.web_cache[uri] = data
+ g._read_formatted_text(data, format)
+ data2 = g.to_string(format=CACHE_FORMAT)
+ # TODO: optimise this out:
+ # Prove that the data loads before writing it to disk.
+ g.engine.load_text(data2, format=CACHE_FORMAT)
+ self.web_cache[uri] = data2
+
+ def file_uri(self, path):
+ import urllib
+ return 'file:'+urllib.pathname2url(path)
+
+ def load_file(self, path, **k):
+ if 'format' not in k:
+ with open(path, 'rb') as f:
+ data = f.read(1024)
+ k['format'] = self._sniff_format(data)
+ else:
+ k['format'] = self._parse_rdf_format(k['format'])
+ uri = self.file_uri(path)
+ self.import_uri(uri, **k)
+
+ def save_file(self, path, format='turtle'):
+ format = self._parse_rdf_format(format)
+ data = self.engine.to_string(format=format)
+ with open(path, 'wb') as f:
+ f.write(data)
def import_uri(self, uri, **k):
"Load data directly from a URI into the Jena model (uncached)"
@@ -348,11 +389,13 @@ def _parse_rdf_format(self, format):
def read_text(self, text, mime=None):
format = self._sniff_format(text, type=mime)
+ return self._read_formatted_text(text, format)
+ def _read_formatted_text(self, text, format):
if format == TURTLE:
self.read_turtle(text)
elif format == N3:
self.read_n3(text)
- elif format == NTRIPLES:
+ elif format == NTRIPLE:
self.read_ntriples(text)
elif format == RDFXML:
self.read_rdfxml(text)
@@ -540,7 +583,7 @@ def _parse_sparql_result(self, result_obj):
def resource(self, uri):
if getattr(uri, 'is_resource', False):
return uri
- return Resource(self, URINode(uri))
+ return Resource(self, self._parse_uri(uri))
get = resource
__getitem__ = resource
def literal(self, thing):
@@ -565,15 +608,15 @@ def add_inference(self, type):
@takes_list
def all_of_type(self, types):
for type in types:
- for x, y, z in self.triples(None, 'rdf:type', self[type]):
+ for x, y, z in self.triples(None, 'rdf:type', type):
yield x
@gives_list
def all_types(self):
seen = {}
for x, y, z in self.triples(None, 'rdf:type', None):
- if z in seen: continue
- seen[z] = True
+ if z.value() in seen: continue
+ seen[z.value()] = True
yield z
@@ -615,6 +658,18 @@ def __iter__(self):
yield x
self.iterable = None
+ def __len__(self):
+ i = 0
+ for _ in self:
+ i += 1
+ return i
+
+ def __repr__(self):
+ return "["+ ", ".join(map(repr, self)) +"]"
+
+ def __str__(self):
+ return "["+ ", ".join(map(str, self)) +"]"
+
class ResourceList(Reiterable):
isResourceList = True
@@ -739,6 +794,22 @@ def value(self):
# Literal
return self.datum.value()
+ def label(self):
+ lbl = self.get(
+ "skos:prefLabel",
+ "rdfs:label",
+ "foaf:name",
+ "dct:title",
+ "dc:title",
+ "sioc:name",
+ )
+ if lbl:
+ return str(lbl)
+ return lbl
+
+ def has_label(self):
+ return bool(self.label())
+
isURIResource = True
def __hash__(self):
@@ -771,10 +842,11 @@ def inverse_property_values(self):
for x, y, z in self.graph.triples(None, None, res._get_raw_datum()):
yield y, x
- def get(self, prop):
+ def get(self, *props):
"Get a property"
- for x in self.all(prop):
- return x
+ for prop in props:
+ for x in self.all(prop):
+ return x
return None
__getitem__ = get
@@ -1078,8 +1150,8 @@ def triples(self, x, y, z):
})
for uri in endpoints:
if Config.sparql_debug:
- print "Auto-query:", uri
- print query
+ print("Auto-query: {0}".format(uri))
+ print(query)
self._triple_query_cache.setdefault(uri, {})[(x, y, z)] = True
self.endpoint(uri).construct(self.data_cache, query)
#
@@ -1106,7 +1178,7 @@ def _sparql(self, query):
def _load_all_sparql(self, query):
for uri in self.select_endpoints(query):
- raise NotImplementedError, "Implement Endpoint class for 'read_sparql'"
+ raise NotImplementedError("Implement Endpoint class for 'read_sparql'")
for x in self.endpoint(uri).select(query):
yield x
@@ -1132,19 +1204,28 @@ class Engine(object):
"""Defines an interface for an RDF triple store and query engine.
"""
def sparql(self, query_text):
- raise NotImplemented, "SPARQL querying not supported by this engine"
+ raise NotImplementedError("SPARQL querying not supported by this engine")
def triples(self, subject, predicate, object):
- raise NotImplemented, "Select triples from the store"
+ raise NotImplementedError("Select triples from the store")
+
+ def load_uri(self, uri, format=TURTLE):
+ raise NotImplementedError("Load RDF from a URI into the store")
- def load_uri(self, uri):
- raise NotImplemented, "Load RDF from a URI into the store"
+ def load_text(self, text, format=TURTLE, encoding='utf-8'):
+ raise NotImplementedError("Load RDF from a string into the store")
+
+ def dump(self, format=TURTLE):
+ return self.to_string(format=format)
+
+ def to_string(self, format=TURTLE):
+ raise NotImplementedError("Dump RDF as a string")
def expand_uri(self, uri):
- raise NotImplementedError, "Expand a URI's shorthand prefix"
+ raise NotImplementedError("Expand a URI's shorthand prefix")
def add_namespace(self, prefix, uri):
- raise NotImplementedError, "Register a namespace and it's prefix"
+ raise NotImplementedError("Register a namespace and it's prefix")
import warnings
warnings.filterwarnings("ignore", message="the sets module is deprecated")
@@ -1214,9 +1295,12 @@ class Node(object):
is_blank = False
is_uri = False
is_literal = False
- def __init__(self, datum):
+ def __init__(self, datum, **k):
self.datum = datum
+ self.init(**k)
assert self.check(), datum
+ def init(self):
+ pass
def __str__(self):
return unicode(self.datum)
@@ -1243,11 +1327,14 @@ def value(self):
def check(self):
uri = self.datum
assert isinstance(uri, (str, unicode)), (uri, type(uri))
+ assert (type(uri) in (str, unicode)), (uri, type(uri))
return True
class Literal(Node):
is_literal = True
def value(self):
return self.datum
+ def init(self, datatype=None):
+ self.datatype = datatype
class Blank(Node):
is_blank = True
def value(self):
@@ -1260,7 +1347,7 @@ def __init__(self, debug=False):
self.debug = debug
else:
def debug(x):
- print x
+ print(x)
self.debug = debug
runJVM()
@@ -1353,7 +1440,7 @@ def _mk_resource(self, res):
JPackage(self._jena_pkg_name).rdf.model.Resource,
)
assert getattr(res, 'is_node', False), (res, type(res))
- assert res.is_uri, res
+# assert res.is_uri, res # XXX: TODO: This breaks with blank nodes, and shouldn't
uri = res.datum
assert isinstance(uri, (unicode, str)), (uri, type(uri))
return JObject(
@@ -1393,6 +1480,8 @@ def _mk_object(self, obj):
return obj.datum
else:
value = obj.value()
+ if isinstance(value, (str, unicode)):
+ value = JString(value)
return JObject(
self.get_model().createTypedLiteral(value),
JPackage(self._jena_pkg_name).rdf.model.RDFNode,
@@ -1430,13 +1519,13 @@ def load_uri(self, uri, format=None, allow_error=False):
else:
self.jena_model = jena
- def load_text(self, text, format=TURTLE):
+ def load_text(self, text, format=TURTLE, encoding='utf-8'):
format = self.get_jena_format(format)
self.debug("JENA load text "+format)
jena = self.get_model()
uri = "tag:string-input"
- if isinstance(text, unicode):
- text = text.encode('utf-8')
+ if not isinstance(text, unicode):
+ text = unicode(text, encoding)
jstr = JString(text)
input = JClass('java.io.StringReader')(jstr)
jena = jena.read(input, uri, format)
@@ -1448,7 +1537,7 @@ def import_sparql(self, endpoint, query):
qexec.execConstruct(self.jena_model)
def has_triple(self, x, y, z):
- self.debug(' '.join(["JENA has_triple ", `x`, `y`, `z`]))
+ self.debug(' '.join(["JENA has_triple ", repr(x), repr(y), repr(z)]))
jena = self.get_model()
sub = self._mk_resource(x)
pred = self._mk_property(y)
@@ -1456,7 +1545,7 @@ def has_triple(self, x, y, z):
return bool(jena.contains(sub, pred, ob))
def set_triple(self, x, y, z):
- self.debug(' '.join(["JENA add_triple ", `x`, `y`, `z`]))
+ self.debug(' '.join(["JENA add_triple ", repr(x), repr(y), repr(z)]))
jena = self.get_model()
sub = self._mk_resource(x)
pred = self._mk_property(y)
@@ -1469,7 +1558,7 @@ def set_triple(self, x, y, z):
jena.add(stmt)
def remove_triples(self, x, y, z):
- self.debug(' '.join(["JENA remove_triples ", `x`, `y`, `z`]))
+ self.debug(' '.join(["JENA remove_triples ", repr(x), repr(y), repr(z)]))
jena = self.get_model()
sub = self._mk_resource(x)
pred = self._mk_property(y)
@@ -1477,7 +1566,7 @@ def remove_triples(self, x, y, z):
jena.removeAll(sub, pred, ob)
def triples(self, x, y, z):
- self.debug(' '.join(["JENA triples ", `x`, `y`, `z`]))
+ self.debug(' '.join(["JENA triples ", repr(x), repr(y), repr(z)]))
jena = self.get_model()
sub = self._mk_resource(x)
pred = self._mk_property(y)
@@ -1539,3 +1628,120 @@ def sparql(self, query_text): # JenaGraph
query = q_pkg.QueryFactory.create(query_text)
qexec = q_pkg.QueryExecutionFactory.create(query, model)
return self._iter_sparql_results(qexec)
+
+
+class RdflibGraph(Engine, Jena):
+ """Defines a mechanism for accessing a triple store in rdflib.
+ """
+ def __init__(self, **k):
+ super(RdflibGraph, self).__init__(**k)
+ import rdflib
+ import rdfextras
+ self.graph = rdflib.Graph()
+
+ def sparql(self, query_text):
+ qres = self.graph.query(query_text)
+ qvars = qres.vars
+ #raise RuntimeError(qres.bindings, query_text)
+ for soln in qres.bindings:
+ d = {}
+ for v in qvars:
+ try:
+ value = soln[v]
+ except KeyError:
+ continue
+ parsed_value = self._convert_rdflib_value(value)
+ d[v.toPython()[1:]] = parsed_value
+ #raise RuntimeError(d, soln)
+ yield d
+ #raise NotImplementedError, "SPARQL querying not supported by this engine"
+
+ def _convert_data_value(self, val):
+ if val is None: return None
+ import rdflib
+ if isinstance(val, URINode):
+ return rdflib.URIRef(val.value())
+ if isinstance(val, Literal):
+ return rdflib.Literal(val.value())
+ raise ValueError(val)
+
+ def _convert_rdflib_value(self, val):
+ if val is None:
+ raise ValueError(val)
+ import rdflib
+ if isinstance(val, rdflib.URIRef):
+ return URINode(val.toPython())
+ if isinstance(val, rdflib.BNode):
+ return Blank(str(val))
+ if isinstance(val, rdflib.Literal):
+ datatype = val.datatype
+ if datatype is not None:
+ datatype = datatype.toPython()
+ return Literal(val.toPython(), datatype=datatype)
+ raise ValueError(val)
+
+ def set_triple(self, subject, predicate, object):
+ self.graph.add((
+ self._convert_data_value(subject),
+ self._convert_data_value(predicate),
+ self._convert_data_value(object),
+ ))
+
+ def remove_triples(self, subject, predicate, object):
+ self.graph.remove((
+ self._convert_data_value(subject),
+ self._convert_data_value(predicate),
+ self._convert_data_value(object),
+ ))
+
+ def _triples(self, subject, predicate, object):
+ for s, p, o in self.graph.triples((
+ self._convert_data_value(subject),
+ self._convert_data_value(predicate),
+ self._convert_data_value(object),
+ )):
+ yield (
+ self._convert_rdflib_value(s),
+ self._convert_rdflib_value(p),
+ self._convert_rdflib_value(o),
+ )
+
+ def triples(self, subject, predicate, object):
+ return list(self._triples(subject, predicate, object))
+
+ def load_uri(self, uri, format=TURTLE):
+ return self.graph.parse(uri, format=self._convert_format_id(format))
+
+ def load_text(self, text, format=TURTLE, encoding='utf8'):
+ #u_text = text.decode(encoding)
+ u_text = text
+ return self.graph.parse(data=u_text, format=self._convert_format_id(format))
+
+ def _convert_format_id(self, format):
+ if format in (TURTLE, N3):
+ return 'n3'
+ if format in (NTRIPLE, ):
+ return 'n3'
+ if format in (RDFXML, ):
+ return 'xml'
+ raise ValueError("Unhandled RDF format descriptor", format)
+
+ def expand_uri(self, uri):
+ if ':' not in uri:
+ return uri
+ prefix, rest = uri.split(':', 1)
+ for p, r in self.graph.namespaces():
+ if prefix == p:
+ return r + rest
+ return uri
+
+ def add_namespace(self, prefix, uri):
+ return self.graph.bind(prefix, uri, True)
+
+ def to_string(self, format=TURTLE):
+ return self.graph.serialize(format=self._convert_format_id(format))
+
+
+# Hook in a more sensible default ;-)
+Graph.use_rdflib()
+
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..3724619
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,24 @@
+
+from distutils.core import setup
+
+setup(
+ name='python-graphite',
+ version='0.2.2',
+ author='Ronan Klyne',
+ author_email='python-graphite@rklyne.net',
+ packages=['graphite'],
+ package_data={
+ 'graphite': [
+ 'config.ini',
+ '*.txt',
+ 'Jena-2.6.4/*.txt',
+ 'Jena-2.6.4/*.html',
+ 'Jena-2.6.4/lib/*.jar',
+ ],
+ },
+ scripts=[],
+ url='http://code.google.com/p/python-graphite/',
+ license='LICENSE.txt',
+ description='A flexible RDF hacking library built on JPype and Jena',
+ long_description=open('README.txt').read(),
+)
diff --git a/test.py b/test/test.py
old mode 100644
new mode 100755
similarity index 62%
rename from test.py
rename to test/test.py
index 378b22b..0505ce7
--- a/test.py
+++ b/test/test.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
"""
These test may seem a bit light. If they don't then they should.
@@ -5,15 +6,23 @@
"""
import unittest
-import rdfgraph
class Test(unittest.TestCase):
verbose = False
+
+ def new_graph(self, g=None):
+ if g is None:
+ g = rdfgraph.Graph()
+ self.g = g
+
def setUp(self):
- self.g = rdfgraph.Graph()
+ self.new_graph()
def tearDown(self):
self.g = None
+ def file_data(self, data):
+ return TempFile(data)
+
SAMPLE_RDFXML = """
@@ -39,6 +48,26 @@ def tearDown(self):
"""
+class TempFile(object):
+ def __init__(self, data):
+ assert isinstance(data, str) # Only permit bytes here.
+ self.data = data
+
+ def __enter__(self):
+ import tempfile
+ tpl = tempfile.mkstemp()
+ fn, self.name = tpl
+ tf = open(self.name, 'wb')
+ tf.write(self.data)
+ tf.close()
+ return self.name
+
+ def __exit__(self, a,b,c):
+ try:
+ import os
+ os.remove(self.name)
+ except: pass
+
class TestRead(Test):
def test_read_XML(self):
self.g.load_rdfxml(SAMPLE_RDFXML)
@@ -105,6 +134,14 @@ def test_set(self, other=None):
self.failUnless(r['tag:p'])
self.assertEquals(r['tag:p'], other)
+ def test_set_char(self):
+ # Check single characters
+ r = self.g.get('tag:dummy1')
+ char = 'A'
+ r['tag:char'] = char
+ self.failUnless(r['tag:char'])
+ self.assertEquals(r['tag:char'], char)
+
def test_set_literal(self):
self.test_set(other=2)
self.test_set(other="Wibble")
@@ -212,9 +249,103 @@ def test_join(self):
lst1
)
+class TestUnicode(Test):
+
+ u_lit = u'\x9c' # What iso-8859-1 calls '\xa3' - the British Pound sign.
+ u_ttl = '''
+ @prefix xsd: .
+ "\xc2\x9c"^^xsd:string .
+ '''
+ _rel = 'tag:new_relation'
+ _res = 'tag:new_resource'
+
+ def assert_loaded(self, g=None):
+ if g is None:
+ g = self.g
+ ts = list(g.triples(None, None, None))
+ self.assertEquals(len(ts), 1)
+ self.assertEquals(self.u_lit, g[self._res][self._rel])
+
+ def assert_not_loaded(self, g=None):
+ if g is None:
+ g = self.g
+ ts = list(g.triples(None, None, None))
+ self.assertEquals(len(ts), 0)
+
+ def test_ttl_load(self):
+ self.g.load_turtle(self.u_ttl)
+ self.assert_loaded()
+
+ def test_ttl_load_file(self, use_cache=False):
+ import os
+ self.assert_not_loaded()
+ with self.file_data(self.u_ttl) as f:
+ self.failUnless(os.path.isfile(f), f)
+ with open(f, 'rb') as fp:
+ self.failUnless(fp, f)
+ if use_cache:
+ uri = self.g.file_uri(f)
+ self.g.load(uri)
+ else:
+ self.g.load_file(f)
+ self.assert_loaded()
+
+ def test_ttl_load_file_with_cache(self):
+ self.test_ttl_load_file(True)
+
+ def test_set_literal(self):
+ r = self.g[self._res]
+ r.set(self._rel, self.u_lit)
+ self.assertEquals(self.u_lit, self.g[self._res][self._rel])
+
+ def test_save_and_load(self):
+ import tempfile
+ fno, name = tempfile.mkstemp()
+ self.g.load_turtle(self.u_ttl)
+ self.assert_loaded()
+ self.g.save_file(name)
+
+ # The test of save is whether we can load it or not.
+ self.new_graph()
+ self.assert_not_loaded()
+ self.g.load_file(name)
+ self.assert_loaded()
+
+
+class TestSparql(Test):
+ def setUp(self):
+ super(TestSparql, self).setUp()
+ self.g.load_ttl("""
+
+ a .
+ """)
+
+ def test_select(self):
+ results = self.g.sparql("select ?s ?p ?o where {?s ?p ?o}")
+ self.failUnless(results)
+ for var, expected in [
+ ('s', 'tag:dummy1'),
+ ('p', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
+ ('o', 'tag:dummy2'),
+ ]:
+ lst = list(results[var])
+ self.assertEquals(len(lst), 1)
+ self.assertEquals(lst[0], expected)
if __name__ == '__main__':
+ # A bit of bootstrap to make sure we test the right stuff
import sys
+ import os
+ mod_path = os.path.join(os.path.dirname(__file__), os.pardir)
+ mod_path = os.path.abspath(mod_path)
+
+ print "Testing in", mod_path
+ sys.path.insert(0, mod_path)
+
+ import graphite.rdfgraph as rdfgraph
+ globals()['rdfgraph'] = rdfgraph
+
+ # Kick off the tests
unittest.main(argv=sys.argv)