RDF data explorer

This aims to provide the most useful summary possible of any RDF data source.

'+caption+'
	Count
%s	%s

'+caption+'

Count

' + chart({ - 'Untyped resources': resource_count-typed_resource_count, - 'Typed resources': typed_resource_count, - 'Properties': property_count, - 'Objects': object_count, - }, caption="Unique URI counts", threshold=2) - - explore_typed = False - if resource_count: - if typed_resource_count/resource_count < 0.1: - result += "Less than 10% of resources are typed. Maybe start looking there?
" - explore_typed = True - - prop_to_res = resource_count/property_count - if prop_to_res < 2: - result += "There are nearly as many properties as resources. This is a web.
" - if prop_to_res > 5: - result += "There are several properties on each resource. This is concentrated information.
" - - result += '''

" + str(len(list(rs))) + ' - ' + ', '.join(map(quote, rs)) + '

RDF Schema explorer

(Reset)

- -

python-graphite

JavaScript InfoViz Toolkit

SELECT * WHERE { ?person a foaf:Person } LIMIT 5") \ - ['person']['foaf:name'].join(', ') \ - -def main2(): - - imdb = 'http://data.linkedmdb.org/sparql' - dbpedia = 'http://dbpedia.org/sparql' - - # - # Try playing with some Linked4 local govt. data - # ( http://linked4.org/lsd/ ) - # - graph = graphite.Graph() - graph.load_sparql( - "http://linked4.org/lsd/sparql", - """ - CONSTRUCT {?x a ?y} WHERE { ?x a ?y } LIMIT 500""" - - ) - - print graph.to_string() - - -def main3(): - - # Let's try some live exploration of a sparql endpoint - no query building! - - dbpedia = 'http://dbpedia.org/sparql' - - # - # Try playing with some Linked4 local govt. data - # ( http://linked4.org/lsd/ ) - # - data = graphite.Dataset() - data.add_endpoint("http://linked4.org/lsd/sparql") - data.add_endpoint(dbpedia) - # Royal Borough of Windsor and Maidenhead - rbwm = 'http://www.rbwm.gov.uk/id/authority/rbwm#id' - rb = data[rbwm].load_same_as() - print rb['rdfs:label'] - - print data.to_string() - - print rb.to_string() - rb.load_same_as() - - # This will be cached :-D - print data[rbwm]['rdfs:label'] - - -def main4(): - - graph = graphite.Dataset() - graph.add_endpoint("http://services.data.gov.uk/reference/sparql") -# graph.add_endpoint("http://linked4.org/lsd/sparql") - - types = [ - (d.get('z', None), d['c'].value()) - for d in - graph.sparql("select ?z (count(distinct ?x) as ?c) where {?x a ?z} group by ?z order by desc(?c) limit 10") - ] - - for t in types: - if not t[0]: continue - print t[0].uri(), ":\t", t[1] -# print str(t), repr(t) - - -def explore_types(): - graph = graphite.Graph() - graph.add_endpoint("http://linked4.org/lsd/sparql") - print graph.all_types().get('rdfs:label').join(', ') - - -if __name__ == '__main__': - main4() - - - diff --git a/graphite/Jena-2.6.4/README.txt b/graphite/Jena-2.6.4/README.txt deleted file mode 100644 index 15da756..0000000 --- a/graphite/Jena-2.6.4/README.txt +++ /dev/null @@ -1,13 +0,0 @@ -Jena2 README -============ - -Welcome to Jena, a Java framework for writing Semantic Web -applications. - -The full readme is in doc/readme.html. - -Documentation is in the doc/ and can be found on the web at -http://jena.sourceforge.net/ - -There is a mailing lists for questions and feedback: -jena-dev@groups.yahoo.com diff --git a/graphite/Jena-2.6.4/ReleaseNotes.txt b/graphite/Jena-2.6.4/ReleaseNotes.txt deleted file mode 100644 index 1a87c23..0000000 --- a/graphite/Jena-2.6.4/ReleaseNotes.txt +++ /dev/null @@ -1,1471 +0,0 @@ -Release Notes -============= - -==== Jena 2.6.3 - -GraphBase: - o Added limits to the number of triples thatr GraphBase.toString() will - serialise. - -Model/Resource API: - o Took advantage of covariant return types to allow Resource.inModel() and - Property.inModel() to have Resource resp. Property return types, allowing - casts of calls to inModel() to (mostly) be eliminated. - - o As per request on mailing list, ModelGraphInterface now has a method - `Resource wrapAsResource(Node)` [name changed to fit better] which - wraps a node as a Resource and throws if the node is a Literal. - - o As per request on mailing list, added Resource method - x.getPropertyResourceValue( Property p ) - which answers some R such that x p R if any exists, otherwise null - - o As per request on mailing list, there's now an OWL2 vocabulary class - with constants for the terms of OWL2. NOTE: there is no OWL2 ontology - support present or implied by this vocabulary class. - - o getModel() has been pulled up from Resource to RDFNode. This means that - Literal now supports getModel() and inModel(). - - o RDFNode has .asLiteral() and .asResource() methods, which deliver - the Literal or Resource respectively if the RDF Node has that type, - and otherwise throw a *RequiredException. - -Misc: - o Patched AnonId using UID when running in environments like GAE - o Fixed problem with comparison on XSDDurations with fractional seconds - -Reasoners: - o added makeSkolem builtin - o fixed deadlock problem with hybrid rule systems - o changed interpretation of Functors so they are neither literals nor objects, - this allows validation of nested reasoners to cope with the introduction of - functors representing inferred individuals - o improved RETE initialization so that fewer triples are injected - into the rule network in cases such as the RDFS no-resource rules - -Ontology API: - o Fixed NPE bug when an ObjectProperty was not the inverseOf another property - -Schemagen: - o Internal refactoring, which should not affect the external behaviour but should - make it easier to extend the schemagen class - o Individuals in an included namespace now appear in the output, even if their - rdf:type is not included. - - -==== Jena 2.6.2 - -+ Resource.addLiteral() now has an overload that takes a Literal object - and uses it as the statement object unaltered. (Previously such a Literal - would be treated as an Object and converted to a String literal, oops). - Similarly model.addLiteral() has a Literal overloading. - -+ N3 pretty writer now write Turtle (no use of "=" for owl:sameAs) - -+ Maven-style codebase layout. - -+ Update to iri.jar (version 0.7). - -+ Change the way URIs are split into namespace and - local part so as to not split %-encoded sequences. - -Reasoners: - o Fixed problems with pure forward rules generating functor values (fixed - looping behaviour, fixed selective filtering of functors). - o Generalized rule reasoner to permit triples in subject position at - Graph level. Returned InfGraph and deductions graphs are safe but - getRawGraph on deductions graph will return generalized graph. - o Fixed handling of unbound groups in regex. - -Core API: - o Fixed bug in comparison of dateTimes with fractional seconds and with - round-tripping from calendars - -ARQ: removed from Jena development - The following jars have been removed from the lib/ directory - they are only used by ARQ: - arq.jar, arq-extra.jar, lucene-core-2.3.1.jar, json.jar, stax-api-1.0.jar, wstx-asl-3.0.0.jar - -(Jena 2.6.1 is an internal tag, not a release) - -==== Jena 2.6.0 - -** Codebase converted to Java 5 - - o Convert to use SLF4J for logging facade - - o Exceptions AlreadyExistsException, DoesNotExistException and - RulesetNotFoundException no longer inherit via RDFRDBException - -Jar changes: - o Removed antlr-2.7.5.jar (no longer used) - o Removed concurrent.jar (no longer used) - o Removed xml-apis.jar (no longer used) - o Removed commons-logging-1.1.1.jar - o Add slf4j-api-1.5.6.jar, slf4j-log4j12.jar - o junit.jar upgraded to junit-4.5.jar - -Deprecation clearance. - Methods, classes and statics marked deprecated in previous releases have been largely removed. - -ObjectF deprecations - The interface ObjectF and the methods that use it have been deprecated. - They are hangovers from before the time RDF acquired typed literals; - nobody should be using them any more. They will be removed in the next - Jena release. - -Reasoners: - o Extended validity checking for OWL configurations to include - datatype range checking that can cope with user datatypes as done for RDFS. - Places the culprit Triple as the ValidityReport.Report extension value. - o Removed deprecated reasoners (RDFS1, RDFSExpt, OWLExpt) and associated support. - o Fixed RDFS-SIMPLE to include properties being reflexive - o Removed DIG 1.0 reasoner interface - -JenaModelSpec: - o This long-deprecated vocabulary (which supported the old ModelSpec system) - has been removed. ModelSpec was replaced by Assembler long ago. - - o some Maker methods that implicitly relied on this vocabulary (but which - had not been deprecated) have also been removed. This affects GraphMaker, - SimpleGraphMaker, BaseGraphMaker, and ModelMakerImpl: the methods used - were getMakerClass, augmentDescription, addDescription, and - getDescription. These methods were only relevant to uses of the old - ModelSpec system, which was removed in previous releases. - -N3: - o N3 parser is now the Turtle parser. - The dialect of N3 that Jena supported (which is quite old) has some idioms that - are not legal Turtle although they are nowadays not correct N3 either. - o Turtle writer now speeded up when many namespaces in use. - -Ontology API: - o Removed support for no-longer-supported OWL syntax checker. - o Removed previously-deprecated namespace management methods from OntDocumentManager. - Namespaces should be managed via the PrefixMapping API. - o Unified code for checking individuals in OntResource.isIndividual() and - OntModel.listIndividuals(), and updated the check to account for some user-reported - edge cases. - -Datatypes: - o Added a getTypeByClass call to TypeMapper and registered additional primitive types - based on suggestion and patch supplied by Thorsten Moeller. - -==== Jena 2.5.7 - -Database layer: - o Compatibility fixes for PostgreSQL 8.3 (no schema changes) - o Compatibility fixes for MS SQL Server 2008 (no schema changes) - -ReasonerFactoryAssembler - o Now takes note of ja:schema properties & binds such a schema - to the reasoners that the factory produces. - -Reasoners: - o Modified @include processing and Rule.rulesFromURL to support - FileManager redirects. - o Fixed problem withLPTopGoalIterator which caused one extra level - of lookahead in some circumstances. - -==== Jena 2.5.6 - -Typed literals: - o Fixed bug in retrieval of decimal values with trailing fractional - zeros by canonicalizing values after parsing and improving - canonicalization. - -==== Jena 2.5.5 - -Jar changes: - o lucene-core has been upgraded from lucene-core-2.0.0 to lucene-2.2.0.jar. - o logging-commons-1.1.jar upgraded to logging-commons-1.1.1.jar - -Deprecated Method Removal Frenzy - o A bunch of methods in Resource, Model, ModelCon, and Statement have - for a while now been marked as deprecated. These methods took literal - values for the object of a statement and used the old treat-it-as-a-string - approach rather than the newer typed-literal approach. - - o These deprecated methods have been removed. However, to allow migrators - an easier life and to avoid requiring many many calls of - createTypedLiteral (which needs a model, not always conveniently to hand), - typed-literal versions of these methods have been added to the interfaces. - - o Model has grown: - - * addLiteral( Resource, Property, Various ) - * listResourcesWithProperty( Property, Various ) - * createLiteralStatement( Resource, Property, Various ) - * listLiteralStatements( Resource, Property, Various ) - * listResourcesWithProperty( Property, Various ) - - o Resource has grown: - - * addLiteral( Property, Various ) - * hasLiteral( Property, Various ) - - o Statement has grown: - - * changeObject( Various ) - - where `Various` is one of float, double, long,boolean, char, short, - int, or object [short & int may be missing in some cases]. - - o As signalled in the Jena 2.5 release, the dedicated DAML API has - now been removed. Users who still process DAML+OIL ontologies should - switch to using the generic OntModel with the DAML profile. - -PrefixMapping loses deprecated usePrefix - o The deprecated PrefixMapping method `usePrefix` has been removed. - (Use `shortForm` instead.) - -ModelSpec - o ModelSpec, which has been deprecated for a while, has been removed, - along with ModelSpecImpl. OntModelSpec, which used to implement - ModelSpec (via OntModelSpecObsolete, also removed), doesn't any more. - -RDF/XML-ABBREV - o Aesthetic improvement changes most RDF/XML-ABBREV output. - The treatment of URI nodes and blank nodes has been made more uniform. - Please give feedback if for your data this is inappropriate. - -Schemagen - o Member variables (i.e. for classes, properties and individuals) are now - lexically sorted to create more stability in change-sets when generated - Java classes are placed under source code control. - o Now tries to guess the namespace of an ontology file in the absence of - specific hints (hints such as: an option, prefix binding for the empty - prefix, or an owl:Ontology or daml:Ontology resource). The algorithm - is to look for the most prevalent URI that is not owl:, rdf: etc. - o Added a new option, --includeSource, which causes schemagen to include - the source code of the input ontology into the generated Java object. - -Ontology API updates - o Added methods to Individual to allow testing and manipulation of the - classes to which an individual belongs. - o Added a method to OntProperty to list the Restrictions that mention - the property - o Clarified the contract of OntModel.listOntProperties, and added a new - method OntModel.listAllOntProperties that is more robust in the - absence of a reasoner, but potentially slower. - o Added a utility class of basic ontology manipulation tools, - com.hp.hpl.jena.ontology.OntTools. So far, this includes: - * compute the lowest common ancestor of two classes - * compute minimum path between two nodes - * compute the named roots of a class hierarchy - -Reasoners: - o Changed the equality contract of Rule objects to include the rule name. - o Extended OWLMicro to include support for hasValue to match documentation. - o Fixed problem with OntModel.getDeductionsModel not always triggering prepare. - o Fixed problem with reasoner configuration when using typed literals - -Change of Behaviour in getByte/Short/Int on typed literals - o The methods getByte(), getShort(), and getInt(), when applied to - typed literals whose value is out of range, delivered a truncated - value [as opposed to those methods on plain literals, which throw - an exception]. This was deemed to be an error, and those methods - will now throw IllegalArgumentExceptions. - - o These methods may be deprecated & removed in later releases. - -Assemblers - o When the Assembler system (ie AssemblerGroup et al) load a class, - that class is loaded before any of the implementsWith of that - group are consulted. Thus the loaded class can update existing - groups inplementsWith tables. - - o Because such an update need not be done to the global group - Assembler.general, the group loading the class must be - available, so using a static initialiser isn't sufficient. - - o Instead, if the class has a (static) method called - `whenRequiredByAssembler` with an AssemblerGroup argument, - that method is called passing in the loading AssemblerGroup. - - o Users are discouraged from using static initialisers which - update Assembler.general, since this will not work as - they might have expected when other assembler groups are - being used. - - o OntModelSpecs have acquired an optional likeBuiltinSpec property - for defining the "base" on which an OntModelSpec can be built by - specifying other properties. In particular, this allows OntModelSpecs - /with no reasoning/ to be constructed, which was a mysterious absence - in earlier assemblers. - -Typed literals: - o Fixed bug in equality checking of instances of xsd:decimal, - and creation of xsd:decimal or xsd:integer instances from BigDecimal - or BigInteger instances - -Reasoners: - o Changed the equality contract of Rule objects to include the rule name. - -==== Jena 2.5.4 - -Graph - o Graph has grown a new method, getStatisticsHandler - o null is an acceptable result - o Graphs that extend GraphBase have this defined for them - o It is intended for the ARQ optimiser optimising in-memory graphs - -Assemblers - o ContentAssembler now respects the fileManager property for external - content. - o withFullModel now does subclass closure (as it should have done) - o the code for findSpecificType is now better - -N3 & Turtle Writer - o Correct writing nested bnodes with no properties to make strict Turtle. - o No longer automatically add default namespace prefix. - -Reasoners - o Added rebindAll() to FBRuleInfGraph - o Extended OWLMini/full to include validation of maxCardinality(1) over individuals - o The (forward) deductions model is now preserved across rebind() operations - so that listeners on deductions models are safe - o added now(?x) builtin to return the current xsd:dateTime - -Misc - o Extended ResourceFactory.createTypedLiteral to handle Calendar objects - -==== Jena 2.5.3 - -Statement.Util - o The interface Statement has grown a Util class containing - the three constants getSubject, getPredicate, and getObject, - which are Map1 objects with the obvious meanings. - -Assembler modification-and-fix - o There was a bug with the behaviour of OntModelSpec assemblers. - The "feature" that if a ja:OntModelSpec had no properties it - was treated as an OntModelSpec constant name did not work, - since the generalAssembler obligingly inferred properties for - it. - - The behavior has changed (I hope for the better ...). Instead, - an OntModelSpec is built by modifying the properties of [a copy - of] a default spec, which is normally OWL_MEM_RDFS_INF. However, - if the root resource is ja:, then - that constant provides the default. This gives the intended - behaviour of "I specified no properties" and additionally - allows specs to be specified as tweaks-to-a-constant. - - ContentAssemblers can now be constructed with default FileManager - arguments. External content is loaded using that FileManager. - The object of an externalContent statement can be a string - literal or a URI. - - AssemblerGroups (which includes the instance Assembler.general) - now support a `copy` method which makes a new group initialised - with the same mapping. - - ReasonerFactoryAssembler now respects JA:reasonerClass properties, - which specify the factory by giving its class name. - -ModelSpec and family REMOVED - o the long-obsolete and recently-deprecated ModelSpec has been removed, - along with its related tests, descendants, registry, and references. - OntModelSpec remains (but is no longer a ModelSpec), with the old - ModelSpec-oriented constructors deprecated and new Assembler statics - introduced. - - OntModelSpec now extends OntModelSpecObsolete, which holds (almost) - all of the OntModelSpec's deprecated ModelSpec-oriented machinery. - OMSO will vanish after the next Jena release, when the remaining - ModelSpec code will evaporate. - -Deprecated XSD.NS in favour of XSD.getURI() which reports namespace with a - trailing # character. - -Reasoners: - o Extended rule syntax to accept URIs wrapped in <..> and file: uris - -Typed literals - o Fixed problem which prevented Calendars being used to instantiate - XSDDates rather than full XSDDateTimes - -IRI - o Fixed conformance problem with mailto: - o Rewrote relativize code, fixing a #frag bug. - -==== Jena 2.5.2 - -RDQL support now in ARQ - o Removed old RDQL engine - Migrate to SPARQL, use ARQ instead and package com.hp.hpl.jena.query - http://jena.sf.net/ARQ/ -- ARQ also supports RDQL as a legacy language - -RDFException removed - o Removed RDFException. It was obsolete (and recently deprecated). It's - subclasses are now subclasses of JenaException. - -rdfcat tool - o The handling of command line arguments to rdfcat has changed slightly. - The input format (N3, RDF/XML, n-triple) argument, -n, -x and -t - respectively, now *only* affect the following argument. In previous - versions, the format was 'sticky' in that it persisted until another - input format argument was supplied. To set the default input format, - a new parameter -in has been added. See the rdfcat Javadoc for full - details. - -==== Jena 2.5.1 - -Fix for test.bat not setting the classpath correctly. -Fix for PropertyNotFoundException from OntResourceImpl.getPropertyValue, not a null return. - -==== Jena 2.5 - -ModelJEB is no longer supported - o The ModelJEB BDB storage for graphs is no longer supported. (It - has been unofficially unsupported for quite a while without - complaint.) It's job -- cheap-and-cheerful persistent models -- has - been taken over by HSQLDB. - -RDFException has been deprecated - o It has been obsolete [because of the JenaException revisions] for - yonks. No-one should be using it (other than some internal uses - to be excised.) It will be removed post-2.5. - -Single-Element Unions [hence, OntModels] - o If a union graph contains a single element, both find and queryHandler - delegate directly to that single element. This means that find does - not need to remember all the found elements to discard duplicates, - and query gets to use any specialised code for that single graph - (rather than the general nested-finds that a full union needs). - In particular, if that single graph is a database graph, it will - be able to use fastpath. - - A specific use of this is an OntModel with no imports in a database. - (If it has imports, then this optimisation cannot apply.) - - In case something breaks as a result, the optimisation can be - disabled by setting the system property jena.union.optimise to - anything other than "yes". - -Reification with Inference Models - o A partial fix to a reported bug with reification on inference models. - Previously the built-in inference models had the reifier of their - base model, and hence deduced triples did not contribute to the - ReifiedStatement API calls, so deduced reified statements were - invisible. Now those inference models have their own reifier and - the reified statements of an inference model are the reified - statements of the base model and of the deductions model. - - Reified statements inferred by backwards inference are /not/ - reported. Reified statements formed from fragments spread between - the base model and the deductions model are /not/ reported. - Implementing those reifications would be expensive both for - the implementors and for any users. - -PropertyImpl.getOrdinal() [internal] - o now computed on demand rather than on construction. Also log-on-too-big - replaced by throw-an-exception. I doubt very very much that this will - ever matter. There was no TestPropertyImpl code, but there is now. It - should be extended to cover the "illegal URI detection", or perhaps we - should remove that since it's RDF/XML-specific and checked by the XML - output code anyway? - -Turtle support - o New Turtle parser (uses javacc, not antlr). Accessed for files with extension ".ttl" - and language names "TTL", "TURTLE", "Turtle" - -Database backends: - o Changed MS SQL Server driver to use NVARCHAR instead of VARCHAR, - to fix i18n support. - o Fixed i18n bug in Oracle backend and also switched to NVARCHAR. - o No requirement to compile Jena for Oracle use. The Oracle driver - now works in the standard Jena distribution. - -ModelFactory: - o added methods to construct models using Assembler system. Updated - ModelFactory howto to include them. Deprecated the ModelSpec - methods. Removed them from ModelFactory howto. - -ModelSpec: - o deprecated the ModelSpec class. Modified the ModelSpec docs to - say it's deprecated. - -Assembler: - o missing subclass declarations added to vocabulary. Bogus `domain` - replaced by correct `range`. FileManagerAssembler now by default - builds a FileManager with the standard locators. To test this, - made the existing standard locator classes have appropriate - .equals() and .hashCode() methods. - - o ConnectionAssembler now records subject resource in - ConnectionDescription. ConnectionDescription falls over - usefully if the URL or type is null when it's opened. - -RDF/XML output: - o bug whereby a literal containing the sequence "]]>" generated illegal - XML (because the "]]>" went through unchanged, and is illegal except - as CDATA end) fixed by converting any ">" in element content into - ">". - - o unencodable characters (any control character except \n, \t, \r, also - \uFFFF and \uFFFE) now throw exceptions. We may allow a new parameter - setting to switch this off later. - - o the entity-conversion code has been rewritten using Java regex's to - avoid multiple conversions. - - o the new Writer property "showDoctypeDeclaration" is used by the RDF/XML - writers to force inclusion of a !DOCTYPE header declaring as entities - each of the prefix-mappings of the model. Attributes that start with - the URIs of the mapping are written to use the entity invocation - instead. - -RDF/XML Input: - o When reading from a URL, content negotiation is now used - prefering: RDF/XML, then XML, over other content types. - o An over-eager and confusing warning was suppressed. - o Behaviour on very large files has been modified. For such files, checks - for illegal reuse of rdf:ID are not made after the first 10000 - have been seen. A warning is issued about this change when - reading a large file. - -IRI: - Release synchronized with an IRI release (iri 0.3). - Minor bug fixes. - -Untyped literal operations deprecated (char, boolean, long, float, double, Object) - These operations do not work on typed literals. They converted their arguments - to plain strings and parsed strings to retrive values. - They are a ghost from pre-datatyping RDF. - ** The forms on RDFNode, String, String+lang, and XML literals remain. - ** Forms taking a typed literal or a lexical form and RDF datatype added (or already exist). - Model: .add, .listStatements, .listSubjectsWithProperty - Resource: .hasProperty, .addProperty - -JAR file changes: - Added: arq-extra.jar - Changed: wstx-asl-2.8.jar ==> wstx-asl-3.0.0.jar - -EnhNodes/UnsupportedPolymorhismException/QualifiedRestriction - o Any EnhNode now automatically canAs any type already on its - view ring if that view is still isValid. Previously it - required finding a relevant Personality and Implementation - and trying canWrap, which failed if the EnhNode had no EnhGraph, - eg a Resource allocated by ResourceFactory. - - o This exposed problems with the QualifiedRestriction family of - classes, which didn't override isValid. They do now. - - o Attempting to .as() an EnhNode with no EnhGraph no longer generates - a NullPointerException, instead giving an explicit - UnsupportedPolymorphism exception. - - o Attempting to canAs an EnhNode with no existing implementation - of the required type and no EnhGraph returns false. - -OntResourceImpl - o some internal modifications to simplify/correct the code: - - - corrected uses of .remove duing an iteration - - replaced some loops by existing single Graph methods - - some uses of getRequiredProperty + exception handling - (probably hangovers from the old days when getProperty - threw an exception) replaced by uses of getProperty and - tests for null - -ResourceImpl - o implementation of removeAll simplified in two ways: uses the - Model removeAll method, and no longer tests for an InfModel, - since the InfGraph implementation of removeAll works on the - base model anyway so no special-case code required. Also - avoids iffy (but currently safe) use of remove(StmtIterator) - where the StmtIterator is active over the same Model. - -Statement - o the method getWellFormed has been deprecated in favour of a new - method hasWellFormedXML(), paralleling Literal's newish - isWellFormedXML(). - -Model - o Model has acquired a new method isClosed(), true iff the model has - been .close()d - - o Graph ditto - - o The built-in Graph and Model implementations have been modified to - implement this method. - -Typed literals - o Fixed bug in serialization of xsd:duration types. - o Added getBigSeconds to XSDDuration to avoid rounding errors in - use of getSeconds. - -OntModel - o the prefix mapping of an OntModel will not offer as a default URI - the default URI of any non-base component. This change is - implemented via Polyadic and hence any MultiUnion will have this - behaviour. - - o added some extra checks so that importing a previously closed model - URI no longer causes ClosedException (SF bug 1474220) - - o aliased method 'listImportedModels' to 'listSubModels' (the preferred - name), and introduced a Boolean flag to allow the listed sub-models to - be presented without their own import sub-models if required - -OntDocumentManager - o added a 'load hook' so that custom behaviours can be executed just - before and/or just after a URI is read in to the contents of an - OntModel or one of its sub-models - -BooleanClassDescription (UnionClass, IntersectionClass, etc) - o canAs() checking in the language profile is now more liberal - for OntClass resources, to prevent partially-formed class - descriptions triggering a ConversionException (SF bug 1608765) - -Legacy DAML API - o This entire API has been deprecated, and will be removed in Jena 2.6. - Ontology users are encouraged to work with OWL rather than DAML+OIL, - however the DAML+OIL profile for OntModel will continue to work - and be supported. The deprecated API is in the package: - com.hp.hpl.jena.ontology.daml - and was originally provided to ease transition of existing code - from Jena 1 to Jena 2. This transitional capability is no longer - supported. - -Reasoners - o Fixed a ConcurrentModificationException bug in TransitiveReasoner - o added simple string hanlding builtins regexp, strConcat, uriConcat - o fixed bug with arithmetic builtins which caused matching against an - already bound but incorrect solution to pass. - o fixed bug in backward reasoner which caused to try redundant solutions - to purely grounded calls. This may improve efficiency in some cases. - o fixed bug in duplicate removal in GenericRuleReasoner in pure backward mode - o fixed bug in parsing of typed literals to handle user defined prefixes - (thanks to Steve Cranefield for the fix) - o removed MAX_VARS limit from the old forward rule engine - -==== Jena 2.4 - -JAR file changes: - Added: wstx-asl-2.8.jar - json.jar - iri.jar - Removed: stax-1.1.1-dev.jar - jakarta-oro-2.0.8.jar - - -N3/Turtle: - o The syntax form 2.3 is now treated as an XSD decimal, not an XSD double - This is inline with chnages to N3 and Turtle specifications and - compatible with SPARQL. - -General - o Fixed various security violations which prevent use within Applets - -Model - o Model[Con] has acquired a new method, remove(S, P, O), - which removes the triple (S, P, O) from the model. None - of S, P, O can be null. O is of type RDFNode (no overloading - for primitive types etc). - -ExtendedIterator [hence NiceIterator and ResultSetIterator] - o added toSet() and toList(), which bundle the [remaining] - elements of the iterator up as a Set or List. Testing - these is a bit tricky; for the moment I've cheated by - testing specific instacnes of WrappedIterator, which is - built on NiceIterator, and NiceIterator is the base class - for /almost/ all the ExtendedIterators in the system, - except ResultSetIterator for which I added special code. - -Node - o added getBlankNodeLabel() as shortcut to the label string - of a blank node. - -AnonId - o added getLabelString(), which delivers the label of this - AnonId and (unlike toString()) is guaranteed to do - /only/ that. - -GraphMatcher - o following up on a performance issue, replaced the idiom - find().hasNext() for containment in GraphMatch with proper - use of contains(). After fixing a hashing problem in - LiteralLabel [see below], this worked and sped up the - checker /lots/ in some circumstances. - -NEW: Assembler specifications - o Assembler specifications replace the old ModelSpecs. They are - documented in doc/assembler/index.html and linked pages. - -JenaTestBase - o Added new test method assertInstanceOf to do instanceof testing - with a nice failure message; updated (some) test code to use it. - -FileGraph[Maker] - o The recent change to FileGraph which meant that closed FileGraph's - were removed from their owning Maker also meant that they were - not deleted when the Maker was closed. Amongst other things, this - left crud in the tmp directory when running the tests. Fixed: the - Maker remembers /all/ the FileGraphs, even the closed ones. - - o Added a constructor that allows the file language to be explicitly - specified. - -LiteralLabel - o reported bug whereby a LiteralLabel can be constructed with the - value part null has been fixed [by putting in the missing 'this.'] - o discovered a problem with literal labels whose values are arrays: - hashing doesn't work. Added delegation so that an RDFDataType - can provide a specialised hash function and arranged that the - relevant XSD datatypes used it. - -Database: - o Added a based MS SQL Server 2000/MSDE 2000 driver. - o Suppressed the "reifier must fix over-specification" messages from tests. - -Reasoners: - o Fixed bug in the forward rule engine which prevented firing of axiomatic - rules with non-empty bodies (i.e. no pattern match but calls to builtins). - o Changed contract for TransitiveReasoner so the same reasoner can be bound - to multiple datasets without interference between them - o Added error check for backward rules with multiple head clauses - -OntModelSpec: - o Changed contract for reasoners so that if a ReasonerFactory is supplied then - the generated reasoner instances won't be cached. This allows the same spec - to be reused multiple times with reasoners which can only be bound once. - - o OntModelSpecs now export access to their to-be-deprecated ModelMakers via - the importModelGetters. This is part of the move to using Assemblers. - -OntDocumentManager - o When the OntDocumentManager asks for a model for an import, it goes via - the OntModelSpec's ModelGetter. Since the default ModelGetter just wraps - the spec's ModelMaker, this doesn't change the usual behaviour, but it - allows the OntModelSpec to use weak Getters. - -Jar changes: - o stax-1.1.1-dev.jar ==> stax-1.1.2-dev.jar - o new iri.jar required - -==== Jena 2.3 - -ARQ - SPARQL for Jena - o ARQ added to Jena - See doc/ARQ/index.html for details - -JAR changes - - jena.jar has been split into jena.jar and jenatest.jar (the test packages). - - Replaced and upgraded with name changes: - antlr.jar => antlr-2.7.5.jar - jakarta-oro-2.0.5.jar => jakarta-oro-2.0.8.jar - log4j-1.2.7.jar => log4j-1.2.12.jar - icu4j.jar => icu4j_3_4.jar - - Xerces jars updated to Xerces 2.7.1 - - New Jarfiles: - arq.jar - jenatest.jar - stax-1.1.1-dev.jar - stax-api-1.0.jar - -Constraint rewriter - o the .graph.query.Rewrite class recognises certain RDQL regex - idioms and rewrites them. The rewritten expressions contained - errors: (a) the case-insensitive classes stored the lowercased - version fo the test string, which broken the RDB generated code; - (b) against all his principles, kers had used toString() on - the results of expression evaluation, which broke the comparison - on typed or languaged literals. These Have Been Fixed. - -PrefixMappings - o new boolean method samePrefixMappingAs(PrefixMapping) compares mappings - for equality but has opportunity to avoid creating intermediate - maps. - - o NOTE: cannot overload .equals() because Model::equals() is already - defined. Making Model implement PrefixMapping may have been a - mistake ... - -ARP - o Total rewrite of internals. Now approx four times faster. - User code will experience significantly less speed up, - depending on the percentage of runtime taken up with - parsing (as opposed to reading the data from the network - or disk, and adding the triples to a Model). - - o The contract concerning behaviour after syntax errors has - changed. (See Javadoc for ARPOptions#setErrorMode, for details - of the new contract) - - o Some changes in the error codes produced for ill-formed - input. - - o The treatment of interrupts has changed. Instead of - throwing an InterruptedIOException, an error - is produced: ERR_INTERRUPTED, and reported through - the error handler as a fatal error. This normally - throws a ParseException. - - o A few public classes that were in the old ARP package, but - labelled as not part of the API have been removed. - - o DOM2Model public constructors have been deprecated, and - replaced with factories. - - o SAX2Model and SAX2RDF factories methods have been deprecated and replaced. - - o SAX2RDF, SAX2Model protected constructors have changed in a not - backwardly compatible fashion. - - o NTriple command-line option documentation changed, in manner - that is theoretically not backwardly compatible. - -r is now default (in documentation, - follows previous implementation). -R option added to cancel -r. - -Node/Node_Literal/Literal - - o Node has gained the method getIndexingValue(), which is the - value to use when indexing this Node for GraphMem lookup (and - other such things). Non-literal nodes return themselves. Literal - nodes return an appropriate value; the current implementation - defers to the getIndexingValue() method of the associated - LiteralLabel. - - o Node has gained the method getLiteralValue(), which fails if - the node is not a Node_Literal and otherwise returns the - value of the associated literal. This method allows uses of - getLiteral().getValue() to be replaced, so that external - code need not know about getLiteral() as much. - - o Literal::getWellFormed() has been deprecated; it is replaced - by Literal::isWellFormedXML(). There is a missing API method - eg isWellFormed() which would apply to any typed literal; - this will arrive in due course. - - o Support for indexing of typed literals added. NodeToTripleMaps now - use an indexing object to represent the Node rather than the Node - itself. That object should implement the appropriate semantic equality: - index(x) == index(y) <=> sameValueAs(x, y) - If future datatyping extensions can't meet this contract it could be weakened to: - index(x) != index(y) => ! sameValueAs(x, y) - at the expense of post-processing find results with a sameValueAs test. - Currently the index objects used are: - plain literals, no lang and xsd:string literals -> the lexical form - plain literals, lang tag -> the Node - XMLLiterals -> the Node - known typed literals -> the java getValue() object - unknown typed literals -> a cons of the lexical value and datatype URI - -RDFNode/Resource/Literal - - o The method Resource::getNode() has been deprecated in favour of - RDFNode::asNode(). - - o The method Resource::isAnon() has been moved up into RDFNode. - - o The (pointless) method Literal::isLiteral() has been moved up - into RDFNode() (where it is pointful). - - o RDFNode has acquired `boolean isURIResource()` and `boolean - isResource()`. - - o This allows all three what-kind-of-node tests to be applied to - an RDFNode. Note that heavy use of these methods is a likely - design smell - visitWith() may be a better solution to the - classification problem. - -GraphMem/GraphTripleStore/NodeToTriplesMap - - o Performance analysis suggests that a chunk of the time in find() was - taken up with redundant comparisions when filtering the intermediate - iterator with the triple pattern. [EG: the hashmap index field is - always right; the ANY nodes are never relevant.] So, after a go or - two around the houses, this was optimised to only test the non-wild - remaining fields, using the new Triple.Field operations plus the new - Filter operations. - - o The default memory-based graph is now GraphMemFaster, which does - optimised query handling. - -Triple - - o S/P/O fields made final (dunno why they weren't already). - - o Added Field class, which gives three constants (getSubject, getObject, - Predicate) which have a getField(Triple) method to extract that field. - Fields also have filterOn() methods to create filters over nodes and - fields of triples; filtering over ANY nodes delivers any(), which - composes cheaply. - -Filters & Iterators - - o Removed a performance infelicity in the default andThen implementation - (it kept calling the left-hand hasNext even when it had been exhausted). - Later replaced the implementation with one that keeps a list of - pending iterators and itself implements .andThen() by extending the - pending list. - - o Added FilterDropiterator so that filterDrop doesn't need to create a - new negated Filter and suffer an extra indirection layer; added - filterKeepIterator for symmetry. - - o Filter is now a class, not an interface, to make it easier to add - new operations to it (otherwise the API changes would be grossly - visible to all users). - - o Filter has new methods: .and(Filter), producing a Filter that passes - only elements that pass both filters, and .filterKeep(Iterator), - which filters the iterator in the same way as ExtendedIterator's - filterKeep operation does. - - o any() has fast implementations for these operations, allowing - it to be used as a fairly cheap identity element. - -ModelSpecs - - o Fixed a bug: the loadFile property did not work on inference models. - The fix ensures that any descendant of ModelSpecImpl implements - createModel() using a method doCreateModel() and then loading the - specified files. - -Schemagen - - o Schemagen now by default includes individuals whose class is in one - of the target namespaces for the document, even if the individual itself - is not. This behaviour can be turned off with option strictIndividuals. - -Typed literals: - o Fixed bug in unparsing of xsd:time values. - - o Added normalization step so that creating a typed literal from - an XSDDateTime will use narrow types (e.g. xsd:date) when appropriate. - - o Fixed bug in sameValueAs when comparing an integer to a float/double. - -Reasoners: - o Extended rule parser to support typed literals using N3 type syntax - such as 'true'^^xsd:byte. - - o Fixed bug with rule sets which include a proprocessing hook to ensure, - the hooks are rerun after new triple adds which should invoke the hook. - - o Fixed two bugs with derivation logging of backward rules. - - o Modified processing of non-monotonic rulesets (involving drop/remove) so - that each entry in the conflict set is fired separately and all the - consquences propagated before attempting to fire the next rule. - To avoid performance hits, rulesets not involving such operators - execute as before. - User defined Builtins which remove data should be marked as such - using the isMonotonic method. - - o Fixed bug in TransitiveGraphCache which had resulted in some - transitive reductions being incompletely reduced (i.e. some - indirect property instances were being incorrectly reported as - being direct). - - o Added "drop" operator as an alternative to "remove" when performing - non-monotonic rewrites. - - o Fixed bug in rebind/reset of infgraphs which use TGC and failed to - reset the transitive engine. - - o Optimized Resource.remove operations for the case where the parent model - is an InfModel. - - o The default DIG reasoner used in the documentation examples has been - changed from Racer to Pellet. Pellet is free and open-source, while Racer - has switched to a commercial license model. - -Ontology API - o OntDocumentManager now delegates file resolution and model caching to - FileManager, which means that FileManager's resolution strategies can - be used to locate ontology files (e.g. from the classloader). - o Prefix mapping and ontology language selection in the OntDocumentManager - has been deprecated, and will be removed in a future version of Jena. - -Command line utilities - o New utility jena.rdfcat, which can merge any number of individual rdf - documents together into one model, and perform syntax translation (e.g. - RDF/XML to N3). - o New query utilities for SPARQL - -ModelLock - o Deprected in favour of Lock (in shared). - o Two implementations: LockMRSW and LockMutex - -==== End Jena 2.3 - - -MySQL ------ -There is a problem when using MySQL j-connection 3.1.* and MySQL 4.1.*. -It manifests itself as truncation of long literals. Systems not using long -literals should not see any problem. Using j-connector 3.0.* or the -development versions of j-connector 3.2 do not exhibit the problem. - -Post 2.2beta change list - - - ARP - o Fixed XMLLiteral bug relevant to OWL-S - o Added workaround for ICU bug. - The workaround may slow processing of Tamil and other langauges - which use Unicode composing characters. If you are processing - large volumes of Tamil using a patched version of icu4j may be - faster. Ask on jena-dev for more information. - o Improved character encoding support: all character encodings - supported by Java are now supported by Jena. - - FileGraph and ModelMakerImpl - o ModelMakerImpl now implements [the obsolescent] createModelOver - using maker.openModel( ... ) rather than .createModel( ... ). This - fixes a problem with existing files in the directory not being - consulted for their contents. - - o FileGraphs now (weakly) support (non-nested) transactions, using - checkpoint files to record their state at a begin() and restoring - that state at an abort(). A commit() writes the current state to - the backing file and deletes the checkpoint. - - InfModelBase (hence, any inference model) - o Inference models now (weakly) support transactions; they delegate them to - their base model. Additionally, an abort() will do a rebind(). - - JMS & ModelSpec - o Added new modelName property to the vocabulary and schema, ready - to properly support named models as well as model makers. - - o The specification for RDB models has changed: it is not the maker, - but it's /hasConnection/ value, that has the connection properties. - (This allows the connection to be shared, or to be prespecified.) - - o The vocabulary class JMS has been renamed to JenaModelSpec. There - is a (deprecated) JMS subclass to allow legacy use of the vocabulary. - - o OntModelSpec understands the `modelName` property; it gives the name - of the model (in the baseModelMaker) which is to be used in a - ontModelSpec.create() call. - - OWL Syntax Checker - o No longer in the Jena download. - o A separate contribution. - o Can be separately download from the Jena project files page. - - PrefixMappings - o the requirement that adding `(prefix, uri)` to a prefix mapping remove - any existing prefix for `uri` has been removed. Calls that run the - mapping backward (eg qNameFor()) will get a correct answer, not - necessarily the same one each time; if possible, it will be the - "most recently bound" prefix. (The "not possible" cases are those - where a prefix has been removed and the inverse mapping has been - regenerated.) - - Ontology API - o As part of a move to provide more consistent behaviour, listDeclaredProperties - has been completely re-written. The new behaviour, which in some - important respects differs from the old behaviour, is now documented - in doc/how-to/rdf-frames.html. There has also been a non - backwards-compatible change in the meaning of the Boolean flag - passed to listDeclaredProperties. - o An OntModel can now list the root classes in the local class hierarchy, - see OntModel.listHierarchyRoots() - - DIG reasoner - o Fixed a bug that meant that and were not being translated - to owl:Thing and owl:Nothing, and hence not appearing in output - - RDF/XML Output - o Improved character encoding support: all character encodings - supported by Java are now supported by Jena. - -Post Jena 2.1 change list [up to 2.2beta] - - RDF API - o Fixed bug in typed literals support which caused isValidLiteral tests - to fail on user defined types derived indirectly from simple types. - o Fixed bugs in conversion of Calendar to XSDDateTime - o Fixed XSDDouble isValidValue test to check for Double - o Fixed XSDbase64Binary, XSDhexBinary returning strings instead of byte[] - - o GraphExtract operations made available through ModelExtract plus - StatementBoundary[Base] & StatementTripleBoundary. - - o Model has gained read(String url, String baseURI, String lang ). - - Database - o Fixed user-reported problem where removeNsPrefix() wasn't persistent - o handles some constraints internally - o A minor change to ModelCom has improved the speed - with which DB models with several prefixes are opened. - - GraphBase and Reification SPI - o Reifier::getHiddenTriples() and getReificationTriples() REMOVED and - replaced by iterator-returning find(TripleMatch) [for all quadlets], - findExposed(TripleMatch) [for exposed quadlets, ie Standard], and - findEither(TripleMatch, boolean showHidden) [for exposed or hidden - quadlets]. - - o Reworking of GraphBase to clean up reification and fix bug with - duplicated fully-reified triples. GraphBase now implements find() - by appending triples from reifier with triples from local triple - store. find() is not over-ridable; over-ride graphBaseFind() instead. - Similarly size -> graphBaseSize, contains -> graphBaseContains. - - o Reworking of SimpleReifier to express it in terms of implementations - of a store for fragments and a store for complete triples, with new - interfaces. This should allow implementors of persistent Graphs an - easier time of it. [Driven by GraphBDB work, so there's an example to - hand.] - - o Reifiers must also implement size[OfExposedTriples]() and close() - methods. - - Model & Graph removeAll(), remove(S, P, O) - o added new API operation Model::removeAll() which removes "all" statements - from a model [currently there are issues about inference models] - o added removeAll() to Graph BulkUpdateHandler - o added Model::remove(S, P, O) which removes all statements matching - (S, P, O) with nulls as wildcards - o added BulkUpdateHandler.remove(S,P,O) removing triples matching - (S, P, O), Node.ANY as wildcard - o BulkUpdateHandler generates events for these - - o ModelFactory has gained createUnion(Model, Model) which creates a - dynamic union of the two argument models. - - o The class GraphExtract and its related interface TripleBoundary - have been created to allow the extraction of rooted subgraphs - from a graph, terminating at triples satisfying some boundary - condition. - - GraphMem, SmallGraphMem - o GraphMem has had the redundant Set(Triple) excised, and changes made - to NodeToTriplesMap to push the triple-matching inwards and simplify - GraphMem's find code. It will use a little less memory and should be - a tad faster. - o a new memory-based Graph, SmallGraphMem, has been introduced. This - *only* holds a Set(Triple); no indexing is done. Hence it is - unsuitable for graphs with more than "a few" statements, unless - memory footprint is (much) more important than search speed. It - is primarily an extreme to compare other Graphs against. - - Graph Capabilities gains findContractSafe() - o used to tell prettywriter that its use of find() works, otherwise - it falls back to the ordinary writer. - - Graph Query handling - o Query now rewrites (some) RDQL pattern matches which are equivalent - to startsWith, endsWith, or contains to use new Expression nodes - with labels J_startsWith, J_endsWith, J_contains, to allow back - ends to optimise those. - - RDQL - o Added langeq operator - o Remove ResultBinding.getValue() (which was an internal use - operation) as part of move to more directly using the graph level - queryhandling. - o ResultBinding becomes an interface. See also ResultBindingImpl. - - Event handling - o added new graph.GraphEvents as holder of event constants with one - such, .removeAll, issued by BulkUpdateHandler for removeAll(), and - a static method for creating removed-by-pattern triple values. - o the standard Jena readers generate startRead and finishRead events - o all the Graph events now come with their source Graph attached as an - argument to the event method. - o added test case and fixed resulting issues to ensure that .remove() - on the iterator from find() generated a notifyDeleteTriple event. - - Reasoners - o Changed processing of PROPruleSet on GenericRuleReasoners to accept - multiple rulesets and merge them - o Added support for @prefix and @include to the Rule parser utility - o Suppressed internal properties (rb:xsdRange etc) from leaking out of infmodel - o Fix space leak in backward chainer - o Added subProperty inheritance of domain/range values - o Fixed validation of owl Functional properties to handle literals - o Changed validation report of un-instantiatable classes to be warnings rather - than errors, report name is "Inconsistent class" - o During validation the culprit resource is now made available via the error - report's getExtension method - o Fixed bug in backward reasoner which caused it to occasionally miss - some results in non-deterministic ways - o Fixed performance problem with listing all triples in a transitive reasoner - o Fixed bug 927644, mishandling of cycles in transitive reasoner - o Fixed ommission in handling of someValuesFrom applied to an rdfs:Datatype. - o Fixed bug in hide() table not being carried across from prebuilt - reasoner caches, which resulted in the prototypical instance of - owl:Thing being visible in listIndividuals. - o Fixed bug in TransitiveReasoner reported by wangxiao - o Changed delete for RETE reasoner to be non-incremental to work around - bug without demanding full reference counting - o Added experimental OWLMini and OWLMicro reasoners - - o [kers] Added WrappedReasonerFactory class and RuleReasoner interface. - Some tweaking to "implements" clauses. Refactored out some setRules - code into FBRuleReasoner and BaseRuleReasonerFactory, to make it easy - to share code for ModelSpec's reasoner specs. - - ModelSpecs - o The modelspec language has been extended. Reasoner specs can now - specify multiple rulesets by URL or by literal strings. Schemas - may also be specified by URL. - - o Internal refactoring has cleaned up the API somewhat, and there - is, optionally, config information read from - - etc/modelspec-config.n3 - - At the moment, this only allows the Java class that implements - a given jms:ModelSpec type to be specified. - - o The JMS (Java Model Spec) vocabulary class has had its schema - extracted - it is now loaded from vocabularies/jena-model-spec.n3. - That vocabulary element is now added to jena.jar (as are some other - vocabulary elements used by the system). - - PrefixMappings - o Standard no longer contains vcard, jms, or rss. - o Extended introduced = Standard + vcard, jms, rss. - o usePrefix deprecated - use shortForm instead. - o qnameFor added; result is legal qname or null. - o w.withDefaultMapping(x) added, which adds mappings from x which - don't clash with those already in w. - o the restriction that namespaces must not end with name characters - has been removed. - - Exceptions - o added WrappedException extends JenaException for wrapped exceptions - o added WrappedIOException extends WrappedException - - Node - o Node cache hash table replaced by specialised implementation - o new Node methods getNameSpace, getLocalName, hasURI(String) - o minor adjustments to Node.create(String) to allow for specifying - language or type for literals, use of default prefix, and - elimination of the nn-NN hack for numeric literals. - o default Node.toString() uses quoting and @/^^ for literals - - Triple - o Triple.create now uses a cache - - Resource - o new method hasURI(String) - - Ontology API - o Added a new method getOWLLanguageLevel() to OntModel, which returns the - OWL sublanguage - lite, DL or full - and error messages - o Fixed ont language profiles to allow .as() on e.g. owl:SymmetricProperty as - OntProperty.class. Previously this relied on the reasoner; the change was - needed to support the DIG interface. - o OntModels created over existing base models don't include elements - from their document manager's PrefixMapping if they would clash with - those already existing in the base. - o Fixed bug 985258 - schemagen can now create RDFS vocabularies - o Fixed bug 940570 - solved a problem with listIndividuals when using - the default OWL reasoner - o Fixed bug 948995 - .as() on owl:InverseFunctionalProperty for - datatype properties failing - o Various fixes to prevent cycles in the graph confusing listSubClasses, - listSuperClasses, etc - o fixed profiles to allow owl:Thing .as(OntClass.class) even if no - reasoner present - o In response to bug 1065103, DAML models now by default use rdfs:subClassOf, - rdfs:subPropertyOf, rdfs:range and rdfs:domain in preference to their daml: - equivalents. The old (Jena 2.1) behaviour is available by switching to the - DAML_OILLegacyProfile in the OntModelSpec. The new version more closely - matches what typical DAML ontologies do. - o Added createIndividual() to OntClass - o Added listDataRanges() to OntModel - - DIG reasoner interface - o Various bug fixes, plus a significant performance fix for listIndividuals() - - OWL Syntax Checker - o Added support for OntDocumentManager - o Improved command line, supports N3 etc, OntDocumentManager - - File Utilities - o ModelLoader retired by deprecation. - Use FileManager instead, specifically, FileManager.get() for the - global FileManager (or create your own). - Create new model with FileManager.get().loadModel - Read into an existing model with FileManager.get().readModel - o In schemagen, inference is now *not* used by default on input models; a - new option --inference has been added to allow inference to be turned on. - - Creating sets/maps - o util.CollectionFactory has static methods for creating hashed Maps - and hashed Sets. These are used in the internals to allow the - implementing classes to be changed (eg to use the trove library). - Non-hashed-collection create methods may follow. - o This was initially called HashUtils; that class remains, with the - initial method names, but it is deprecated and will disappear post - J2.2. - - ARP - o New support for non-Xerces SAX sources. - o Support for DOM sources (Java 1.4.2 and later). - o ARP setup rationalized, a few methods deprecated as a result. - o Improved documentation, covering new features, (see doc/ARP) - o *Removed* StanfordImpl, assuming noone uses it. Please - complain (jena-dev@yahoogroups.com) if assumption was false. - - Utilities - o new class IteratorCollection with methods iteratorToSet and - iteratorToList (heavility used in tests and useful in general) - - N3 - o Resolve relative URIs if there is a base. - - - - -Jena 2.1 - Main changes from Jena 2.0 - Installation - The names of some jars have changed be sure to update your classpath. - The name of the xerces jars are now: xercesImpl.jar and xml-apis.jar - We also require Jakarta commons logging: commons-logging.jar - See readme.html for the full list of jars. - - OWL Syntax Checker - Major update from alpha version (Jena 2.0), to production - version (Jena 2.1). - API created. - Many fold performance improvement (orders of - magnitude) - Now conformant with OWL Recommendation. - Streaming mode added, suitable for lower memory - environments or large input. - Command-Line jena.owlsyntax program added. - Still to do: better error messages. - - RDF/XML-ABBREV output - Changes default configuration to not use property - attributes, which seem unpopular. - - ARP - Extended API to show bnode scope and XML Namespaces. - Discovered memory leak which has been present since - the beginning. This is not fixed. Users of ARP and - Jena in memory limited, or long-running applications, - or reading lots of varied RDF/XML, should read the - updated Javadoc for the package - com.hp.hpl.jena.rdf.arp. - - Reasoner - Small bug fixes (see below). - - Xerces - Now requires Xerces 2.6.0 or better. The included jars are Xerces 2.6.1. - - Ontology API - General bug fixes and improvements based on jena-dev feedback. - - The default document manager policy (etc/ont-policy.rdf) no longer - re-directs imports of owl.owl and daml+oil.daml to a cached copy in - the 'vocabulary' directory. This is becuase the vocabulary directory - is not included in jena.jar, and this default re-direction was causing - problems in some applet or web service environments. The Jena 2.0 - behaviour can be restored by replacing ont-policy.rdf with ont-policy-test.rdf. - - Instance detection has been improved, with the side-effect that DAML - ontologies using the DAML micro rule reasoner may now report that - instances have rdf:type daml:Thing in addition to other types. - -Jena 2.1 changes from Jena 2.1-dev-3 - Minor bug fixes in OWL Syntax Checker. - Streaming mode in OWL Syntax Checker. - Documented ARP memory leak. - -Jena 2.1-dev-3 - Implements W3C RDF and OWL Proposed Recommendations - OWL Syntax Checker - much faster, new API (error msgs still being worked on) - RDF/XML-ABBREV output various bug fixes - RDF/XML-ABBREV msg added requesting bug reports on rdf:parseType="Collection" - ARP new extended handler for scope of blank nodes and namespace handler - ARP improved syntax error messages - OWL Syntax checker prolog source included in download (see tools dir) - -Jena 2.1-dev-2: - Developers' release to include recent bug fixes (notably - handling of typed literals in validation rules). - Do not use this version unless you need one or more bug - fixes not in Jena 2.0. - -Jena 2.1-dev-1: - This is a developers' release, particularly - intended for users of the OWL Syntax Checker. - Most users should continue to use Jena 2.0. - For changes, see directly below. - Documentation may not be up to date. - Do not use this version unless: - - you need a conformant OWL Syntax Checker - - you need one or more of the bug fixes not in - Jena 2.0 - - -RDF API: -o Bug fixes: - - fixed issue with typed literals sometimes treating lexically different, - sameValueAs resources as equal. - - - fixed bug in Model::remove(StmtIterator) at the expense of manifesting - the iterator into a List - - - fixed bug in .remove on StmtIterators returned from listStatements() - on a memory model - -o ModelFactory improvements: - - ModelSpecs can now be identified by URI as well as Model values - - ModelRDBMaker.createModel will now return a ModelRDB rather than plain Model - -Reasoner subsystem: - -o Fixed delete bug in FORWARD_RETE configurations, remove of statements - should now remove the consequents of that statement. -o Added a check for constructing one OWL reasoner instance layered on top - of another because this can have a large performance impact to no benefit. -o Added a "hide" primitive which marks nodes as hidden. When querying an - inference model no triples containing hidden subject or object nodes will - be included in the result iterator. - Used this to hide class-prototype instances -o Extended the comparision builtins (equal, le etc) to support comparison of - XSDDataTime instances. Many thanks to Bradley Schatz (Bradley@greystate.com) - for supply the patches for this. -o Extended OWL rules to include more explicit representation of XSD knowledge. -o Various bug fixes in OWL rules (maxCardinality bug, hasValue in intersection - lists fixed, bug in someValuesFrom fixed, misssing property subclass axioms). -o Fixed bug in RETE engine which could loop when deleting non-deletable triples. -o Fixed bug in LP engine which could lead to loss of variable bindings - (manifested as "Internal error in LP reasoner: variable in triple result") -o Extended is/notDTtype to check for ill-formed typed literals. - -OWL Syntax Checker -o Now conforms with OWL Proposed Rec of December 2003 - - Performance much improved. There is - about a one second delay on start-up. - - Error messages still somewhat cryptic - (Should be better in next release) - -RDF/XML-ABBREV output -o Failed to fix bug concerning rdf:parseType="Collection" - - added new message trying to generate sufficient user feedback - on jena-dev to track down bug. -o Fixed other bugs on bug list - -RDQL -o Improved handling of character sets in qnames - -N3 -o Improved handling of character sets in qnames - - - -Graph query SPI [NB NOT visible at the model level] - -o replaced use of Graph for constraints by new Expression interface as - part of ongoing query improvement. diff --git a/graphite/Jena-2.6.4/copyright.txt b/graphite/Jena-2.6.4/copyright.txt deleted file mode 100644 index f2790aa..0000000 --- a/graphite/Jena-2.6.4/copyright.txt +++ /dev/null @@ -1,30 +0,0 @@ -/* - * (c) Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Hewlett-Packard Development Company, LP - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - -This product includes software developed by the -Apache Software Foundation (http://www.apache.org/). diff --git a/graphite/Jena-2.6.4/lib/arq-2.8.7.jar b/graphite/Jena-2.6.4/lib/arq-2.8.7.jar deleted file mode 100644 index 3040a99..0000000 Binary files a/graphite/Jena-2.6.4/lib/arq-2.8.7.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/arq-2.8.8.jar b/graphite/Jena-2.6.4/lib/arq-2.8.8.jar deleted file mode 100644 index 280f028..0000000 Binary files a/graphite/Jena-2.6.4/lib/arq-2.8.8.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/icu4j-3.4.4.jar b/graphite/Jena-2.6.4/lib/icu4j-3.4.4.jar deleted file mode 100644 index f5e8c16..0000000 Binary files a/graphite/Jena-2.6.4/lib/icu4j-3.4.4.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/iri-0.8.jar b/graphite/Jena-2.6.4/lib/iri-0.8.jar deleted file mode 100644 index f096c68..0000000 Binary files a/graphite/Jena-2.6.4/lib/iri-0.8.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/jena-2.6.4.jar b/graphite/Jena-2.6.4/lib/jena-2.6.4.jar deleted file mode 100644 index efc64a9..0000000 Binary files a/graphite/Jena-2.6.4/lib/jena-2.6.4.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/log4j-1.2.13.jar b/graphite/Jena-2.6.4/lib/log4j-1.2.13.jar deleted file mode 100644 index dde9972..0000000 Binary files a/graphite/Jena-2.6.4/lib/log4j-1.2.13.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/slf4j-api-1.5.8.jar b/graphite/Jena-2.6.4/lib/slf4j-api-1.5.8.jar deleted file mode 100644 index 20d1d37..0000000 Binary files a/graphite/Jena-2.6.4/lib/slf4j-api-1.5.8.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/slf4j-log4j12-1.5.8.jar b/graphite/Jena-2.6.4/lib/slf4j-log4j12-1.5.8.jar deleted file mode 100644 index a47eee4..0000000 Binary files a/graphite/Jena-2.6.4/lib/slf4j-log4j12-1.5.8.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/stax-api-1.0.1.jar b/graphite/Jena-2.6.4/lib/stax-api-1.0.1.jar deleted file mode 100644 index d9a1665..0000000 Binary files a/graphite/Jena-2.6.4/lib/stax-api-1.0.1.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/wstx-asl-3.2.9.jar b/graphite/Jena-2.6.4/lib/wstx-asl-3.2.9.jar deleted file mode 100644 index ffdbd1f..0000000 Binary files a/graphite/Jena-2.6.4/lib/wstx-asl-3.2.9.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/lib/xercesImpl-2.7.1.jar b/graphite/Jena-2.6.4/lib/xercesImpl-2.7.1.jar deleted file mode 100644 index 0b100e1..0000000 Binary files a/graphite/Jena-2.6.4/lib/xercesImpl-2.7.1.jar and /dev/null differ diff --git a/graphite/Jena-2.6.4/readme.html b/graphite/Jena-2.6.4/readme.html deleted file mode 100644 index 535cb7b..0000000 --- a/graphite/Jena-2.6.4/readme.html +++ /dev/null @@ -1,21 +0,0 @@ - - - Jena README - - - - - - - -

Jena is a Java framework for writing Semantic Web applications.

- -

The readme file is in doc/readme.html.

- -

Documentation is in doc/index.html - and is avilable on the web - http://jena.sourceforge.net/. -

- - - diff --git a/graphite/LICENSE.txt b/graphite/LICENSE.txt deleted file mode 100644 index aa21393..0000000 --- a/graphite/LICENSE.txt +++ /dev/null @@ -1,13 +0,0 @@ -Copyright 2011 Ronan Klyne - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. diff --git a/graphite/__init__.py b/graphite/__init__.py deleted file mode 100644 index f577b4c..0000000 --- a/graphite/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ - -from rdfgraph import Graph, Dataset, Endpoint, Config diff --git a/graphite/config.ini b/graphite/config.ini deleted file mode 100644 index b350593..0000000 --- a/graphite/config.ini +++ /dev/null @@ -1,17 +0,0 @@ - -[config] -# Path to Jena's libs directory. -jena_libs = Jena-2.6.4/lib - -# The software tries to have a good guess if you set nothing here but you may -# need to tinker. -# -# Windows -#jvm_lib = bin\client\jvm.dll -# Linux amd64 -#jvm_lib = jre/lib/amd64/server/libjvm.so -# Linux x86 -#jvm_lib = jre/lib/x86/server/libjvm.so - -# Show the SPARQL queries being made -# sparql_debug = 1 diff --git a/graphite/rdfgraph.py b/graphite/rdfgraph.py deleted file mode 100644 index 397c58b..0000000 --- a/graphite/rdfgraph.py +++ /dev/null @@ -1,1747 +0,0 @@ -""" A hackers RDF query and manipulation tool. -Ripped off from Chris Gutteridge's Graphite: http://graphite.ecs.soton.ac.uk/ -""" - -from __future__ import print_function - -# CONFIG! (finally) - -class Config(object): - config_files = [ - 'config.ini', - ] - sparql_debug = False - cache_dir = 'rdfgraph.cache' - - def __init__(self): - self.load() - def load(self): - import os - base_dir = os.path.dirname(__file__) - work_dir = os.getcwd() - import ConfigParser - cp = ConfigParser.SafeConfigParser(defaults={ - 'jena_libs': 'jena/libs', - 'jvm_lib': None, - }) - cp.read(map( - lambda name: os.path.join(base_dir, name), - self.config_files, - )) - - libs_cfg = cp.get('config', 'jena_libs') - if libs_cfg: - self.jena_libs = os.path.join(base_dir, libs_cfg) - self.jena_libs = os.path.abspath(self.jena_libs) - self.jena_libs += '/' - - jvm_cfg = cp.get('config', 'jvm_lib') - if jvm_cfg: - # Have a good guess with relative paths - probably JAVA_HOME relative - java_base_dir = os.environ.get('JAVA_HOME', None) or base_dir - self.jvm_file = os.path.join(java_base_dir, jvm_cfg) - self.jvm_file = os.path.abspath(self.jvm_file) - else: - self.jvm_file = None # Guess later - - try: - cp.get('config', 'sparql_debug') - self.sparql_debug = True - except: pass - try: - cache_dir = cp.get('config', 'cache_dir') - except: - cache_dir = self.cache_dir - self.cache_dir = os.path.join(work_dir, cache_dir) - -Config = Config() - -# Some constants -DEFAULT_NAMESPACES = { - 'owl': 'http://www.w3.org/2002/07/owl#', - 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', - 'xsd': 'http://www.w3.org/2001/XMLSchema#', - 'yago': 'http://dbpedia.org/class/yago/', - 'dbpedia': 'http://dbpedia.org/resource/', - 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#', - 'skos': 'http://www.w3.org/2004/02/skos/core#', - 'foaf': 'http://xmlns.com/foaf/0.1/', - 'void': 'http://rdfs.org/ns/void#', - 'qb': 'http://purl.org/linked-data/cube#', - 'dcterms': 'http://purl.org/dc/terms/', - 'interval': 'http://reference.data.gov.uk/def/intervals/', - 'org': 'http://www.w3.org/ns/org#', - 'vcard': 'http://www.w3.org/2006/vcard/ns#', - 'payment': 'http://reference.data.gov.uk/def/payment#', - 'council': 'http://reference.data.gov.uk/def/council#', - 'internal': 'http://www.epimorphics.com/vocabularies/spend/internal#', -} -RDFXML = 1001 -N3 = 1002 -NTRIPLE = 1003 -TURTLE = 1004 -HTML = 1005 # Will support RDFa eventually. - -# Some decorators -def takes_list(f): - "Parses a Resource iterator out of the tuple passed" - def parse_list(self, tpl): - for item in tpl: - if getattr(item, 'isResourceList', False) or isinstance(item, list): - for resource in item: - yield resource - elif getattr(item, 'is_resource', False): - yield item - elif isinstance(item, (str, unicode)): - # Assume it's a URI. Maybe add some literal support later. - yield self.resource(item) - else: - yield item - - def g(self, *t, **k): - l = parse_list(self, t) - return f(self, l, **k) - return g -def gives_list(f): - def g(*t, **k): - return ResourceList(f(*t, **k)) - return g -def memoise(f): - memos = {} - def g(self, *t): - memo = memos.setdefault(self, {}) - if t in memo: - return memo[t] - r = f(self, *t) - memo[t] = r - return r - return g - -class FileCache(object): - index_name = 'index.marshal' - def __init__(self, path): - import os - self.dir = os.path.join(Config.cache_dir, path) - self.index = {} - if not os.path.isdir(self.dir): - os.makedirs(self.dir) - self.load_index(allow_error=True) - def open(self, name, *t, **k): - import os - return open(os.path.join(self.dir, name), *t, **k) - def has(self, name): - return name in self.index - __contains__ = has - def get_path(self, name): - return self.index[name] - def get(self, name): - if name not in self.index: - raise KeyError(name) - with self.open(self.index[name], 'rb') as f: - return f.read().decode('utf-8') - __getitem__ = get - def set(self, name, data): - fname = self.index.get(name, None) - if fname is None: - fname = self._new_name() - self.index[name] = fname - self.save_index() - with self.open(fname, 'wb') as f: - if isinstance(data, unicode): - data = data.encode('utf-8') - f.write(data) - __setitem__ = set - def _new_name(self): - import os - import tempfile - fnum, fname = tempfile.mkstemp(prefix='d-', suffix='.rawdata', dir=self.dir) - return fname - - def load_index(self, allow_error=True): - try: - f = self.open(self.index_name, 'rb') - except: - if not allow_error: - raise - return - import marshal - try: - self.index = marshal.load(f) - except: - if not allow_error: - raise - f.close() - def save_index(self): - f = self.open(self.index_name, 'wb') - try: - import marshal - marshal.dump(self.index, f) - finally: - f.close() - -class CacheFactory(object): - caches = {} - def __init__(self, klass=FileCache): - self.klass = klass - def get(self, name): - import os - if name not in self.caches: - self.caches[name] = self.klass(os.path.join(Config.cache_dir, name)) - return self.caches[name] - __getitem__ = get - -caches = CacheFactory() - -class Context(object): - def __init__(self): - self.stack = [] - def __enter__(self, *t): - self.stack.append(t) - def __exit__(self, x, y, z): - self.stack.pop() - def active(self): - return bool(self.stack) - def current(self): - return self.stack[-1] - -NoAutoQuery = Context() - -class Graph(object): - """Represents an RDF graph in memory. - Provides methods to load data and query in a nice way. - """ - is_graph = True - web_cache = caches['web'] - def __init__(self, uri=None, namespaces=None, engine=None): - if not engine: - engine = self.create_default_engine() - self.engine = engine - self.graph = None - # A log of which URIs have been loaded already, for efficiency - self.loaded = {} - if uri: - self.load(uri) - self.add_ns(DEFAULT_NAMESPACES) - if namespaces: - self.add_ns(namespaces) - - @classmethod - def get_default_engine_class(cls): - return getattr(Graph, '_default_graph_class', JenaGraph) - @classmethod - def use_jena(cls): - Graph._default_graph_class = JenaGraph - @classmethod - def use_rdflib(cls): - Graph._default_graph_class = RdflibGraph - - def create_default_engine(self): - return self.get_default_engine_class()() - - @takes_list - def read_uri(self, lst, allow_error=False, _cache=[], **k): - reload = k.get('reload', False) - assert lst, "Load what?" - for datum in lst: - assert getattr(datum, 'isURIResource', False), "Can't load {0!r}".format(datum) - try: - self._load_uri(datum.uri(), reload=reload, format=k.get('format', None)) - except: - if not allow_error: - raise - return self - load = read_uri - - def _sniff_format(self, data, type=None): - if type and type not in [ - 'text/plain', - 'application/octet-stream', - ]: - if type in [ - 'text/turtle', - ]: - return TURTLE - elif type in [ - 'application/rdf+xml', - 'text/xml', - ]: - return RDFXML - elif type in [ - 'text/n3', - ]: - return N3 - all_data = data - data = data[:2048] - ldata = data.lower() - if ldata.find('') >= 0: - return HTML - if ldata.find('= 0: - return HTML - if ldata.find('') >= 0: - return RDFXML - if ldata.find('@prefix') >= 0: - return TURTLE - if ldata.find('/rdf>') >= 0: - return RDFXML - if ldata.find(':rdf>') >= 0: - return RDFXML - return TURTLE - - def _load_uri(self, uri, **k): - "Load data from the web into the web cache and return the new model." - reload = k.get('reload', False) - if 'format' in k: - k['format'] = self._parse_rdf_format(k['format']) - # Strip the fragment from this URI before caching it. - assert isinstance(uri, (str, unicode)), uri - import urlparse - uri_key = ''.join(urlparse.urlparse(uri)[:5]) - if not reload and uri_key in self.loaded: return - self.loaded[uri_key] = True - # I preferred turtle here, but RDFXML seems more robust with dodgy input data. - CACHE_FORMAT = RDFXML - if uri in self.web_cache: - try: - self.import_uri('file:///'+self.web_cache.get_path(uri), format=CACHE_FORMAT) - except: - print("Error getting <"+uri+"> from cache") - raise - else: - import urllib2 - r = urllib2.Request( - uri, - headers={ - 'accept': 'text/turtle; q=0.9, text/n3; q=0.8, application/rdf+xml; q=0.5' - } - ) - f = urllib2.urlopen(r) - msg = f.info() - data = f.read(1024) - mime = msg.getheader("content-type") - enc = msg.getheader("content-encoding", 'utf-8') - format = self._sniff_format(data, type=mime) - if format == HTML: - raise RuntimeError("Got HTML data", uri, data, mime) - data += f.read() - data = data.decode(enc) - self.engine.load_text(data, format) - - # Then write the data to the cache. - g = Graph() - g._read_formatted_text(data, format) - data2 = g.to_string(format=CACHE_FORMAT) - # TODO: optimise this out: - # Prove that the data loads before writing it to disk. - g.engine.load_text(data2, format=CACHE_FORMAT) - self.web_cache[uri] = data2 - - def file_uri(self, path): - import urllib - return 'file:'+urllib.pathname2url(path) - - def load_file(self, path, **k): - if 'format' not in k: - with open(path, 'rb') as f: - data = f.read(1024) - k['format'] = self._sniff_format(data) - else: - k['format'] = self._parse_rdf_format(k['format']) - uri = self.file_uri(path) - self.import_uri(uri, **k) - - def save_file(self, path, format='turtle'): - format = self._parse_rdf_format(format) - data = self.engine.to_string(format=format) - with open(path, 'wb') as f: - f.write(data) - - def import_uri(self, uri, **k): - "Load data directly from a URI into the Jena model (uncached)" - self.engine.load_uri(uri, **k) - - def _parse_rdf_format(self, format): - if format is None: return None - if not isinstance(format, str): return format - f = format.lower() - if f in [ - 'rdfxml', - 'rdf/xml', - 'xml', - ]: - return RDFXML - elif f in [ - 'turtle', - 'ttl', - ]: - return TURTLE - elif f in [ - 'ntriples', - 'n-triples', - 'ntriple', - ]: - return NTRIPLE - elif f in [ - 'n3', - ]: - return N3 - else: - raise RuntimeError("bad format") - - def read_text(self, text, mime=None): - format = self._sniff_format(text, type=mime) - return self._read_formatted_text(text, format) - def _read_formatted_text(self, text, format): - if format == TURTLE: - self.read_turtle(text) - elif format == N3: - self.read_n3(text) - elif format == NTRIPLE: - self.read_ntriples(text) - elif format == RDFXML: - self.read_rdfxml(text) - else: - raise RuntimeError("bad format", format) - - def read_rdfxml(self, text): - self.engine.load_text(text, RDFXML) - return self - load_rdfxml = read_rdfxml - load_RDFXML = load_rdfxml - - def read_turtle(self, text): - self.engine.load_text(text, TURTLE) - return self - load_turtle = read_turtle - load_ttl = load_turtle - load_TTL = load_turtle - - def read_n3(self, text): - self.engine.load_text(text, N3) - return self - load_N3 = read_n3 - load_n3 = load_N3 - - def read_ntriples(self, text): - self.engine.load_text(text, NTRIPLE) - return self - load_ntriple = read_ntriples - load_ntriples = load_ntriple - load_NTRIPLE = load_ntriple - - def _parse_uri(self, data): - if getattr(data, 'is_node', False): - return data - if isinstance(data, Resource): - if data.is_uri(): - return data.datum - else: - return None - if not isinstance(data, (str, unicode)): - return None - return URINode(self._expand_uri(data)) - - def _expand_uri(self, uri_in): - uri = uri_in - if getattr(uri, 'isURIResource', False): - uri = uri.uri - import urlparse - tpl = urlparse.urlparse(uri) - if tpl[0] and tpl[2] and len(filter(None, tpl)) == 2: - # "dc:title"-ish - return self.engine.expand_uri(uri) - return uri - expand_uri = _expand_uri - - def shrink_uri(self, uri): - if getattr(uri, 'isURIResource', False): - uri = uri.uri - return unicode(self.engine.shrink_uri(uri)) - - def _parse_subject(self, sub): - if sub is None: - return None - attempt = self._parse_uri(sub) - if attempt is not None: - return attempt - raise ValueError(sub) - - def _parse_object(self, obj): - if obj is None: - return None - attempt = self._parse_uri(obj) - if attempt is not None: - return attempt - if callable(getattr(obj, 'value', None)): - return Literal(obj.value()) - raise ValueError(obj) - return obj - - def _parse_property(self, prop): - if prop is None: - return None - attempt = self._parse_uri(prop) - if attempt is not None: - return attempt - raise ValueError(prop) - - def _format_as_html(self, data): - # XXX: Not Implemented !!! - return data - - def dump(self): # Graph - resources = {} - seen_resources = {} - for res, _, typ in self.triples(None, 'rdf:type', None): - if res.uri in seen_resources: continue - seen_resources[res.uri] = True - l = resources.setdefault(typ, []) - l.append(res) - import cgi - def quote(s): - return cgi.escape(unicode(s)) - s = '

' - for typ in resources.keys(): - s += '

' - typ_label = self[typ]['rdf:label'] - if typ_label: - s += '

' - s += quote(typ_label) - s += '

' - else: - s += 'Type:' - s += ' (%s)' % ( - quote(self.expand_uri(typ)), - quote(self.shrink_uri(typ)), - ) - for res in resources[typ]: - s += res.dump() - s += '

' - s += '

' - return s - - def to_string(self, **k): - return self.engine.dump(**k) - - def dump_resources(self, res, extended=False): - # Use this to fire a pre-load - for r in res: - self.triples(r.uri, None, None) - return self._format_as_html( - self.engine.dump_resources(res, extended=extended) - ) - - def has_triple(self, *t): # Graph - "Returns True if triples(*t) would return any triples." - # TODO: Optimise this! Should use 'has...' in the engine. - for x in self.triples(*t): - return True - return False - - def set_triple(self, x, y, z): - self.engine.set_triple( - self._parse_subject(x), - self._parse_property(y), - self._parse_object(z), - ) - return self - add = set_triple - def remove_triples(self, x, y, z): - self.engine.remove_triples( - self._parse_subject(x), - self._parse_property(y), - self._parse_object(z), - ) - return self - remove = remove_triples - - @gives_list - def triples(self, x, y, z): - triple_iter = self.engine.triples( - self._parse_subject(x), - self._parse_property(y), - self._parse_object(z), - ) - for sub, pred, ob in triple_iter: - ob = Resource(self, ob) - sub = Resource(self, sub) - pred = Resource(self, pred) - - yield sub, pred, ob - - def sparql(self, query_text): # Graph - return SparqlList(self._parse_sparql_result(self.engine.sparql(query_text))) - - def _parse_sparql_result(self, result_obj): - for result in result_obj: - output = {} - for k, v in result.items(): - if v.is_uri: - v = Resource(self, v) - output[k] = v - yield output - - def resource(self, uri): - if getattr(uri, 'is_resource', False): - return uri - return Resource(self, self._parse_uri(uri)) - get = resource - __getitem__ = resource - def literal(self, thing): - return Resource(self, Literal(thing)) - - def add_ns(self, *t, **k): - return self.add_namespaces(*t, **k) - - def add_namespaces(self, namespaces): - for prefix, uri in namespaces.items(): - self.engine.add_namespace(prefix, uri) - - def prefixes(self): - return self.engine.namespaces() - namespaces = prefixes - - def add_inference(self, type): - self.engine.add_inference(type) - return self - - @gives_list - @takes_list - def all_of_type(self, types): - for type in types: - for x, y, z in self.triples(None, 'rdf:type', type): - yield x - - @gives_list - def all_types(self): - seen = {} - for x, y, z in self.triples(None, 'rdf:type', None): - if z.value() in seen: continue - seen[z.value()] = True - yield z - - -def no_auto_query(f): - def g(*t, **k): - with NoAutoQuery: - return f(*t, **k) - return g -class SparqlStats(object): - """This is a non functional stub class. - - Implementors could store stats on endpoint for deciding whether to send it a - particular query. - """ - def __init__(self, uri, graph): - self.uri = uri - self.graph = graph - - def use_for_triple(self, triple): - return True - - def use_for_query(self, query): - return True - - -class Reiterable(object): - def __init__(self, iterable): - self.iterable = iterable - self.iter_history = [] - - def __iter__(self): - i = 0 - while i < len(self.iter_history): - yield self.iter_history[i] - i += 1 - if self.iterable: - for x in self.iterable: - self.iter_history.append(x) - yield x - self.iterable = None - - def __len__(self): - i = 0 - for _ in self: - i += 1 - return i - - def __repr__(self): - return "["+ ", ".join(map(repr, self)) +"]" - - def __str__(self): - return "["+ ", ".join(map(str, self)) +"]" - -class ResourceList(Reiterable): - isResourceList = True - - def first(self): - for x in self: - return x - - def map(self, name, *t, **k): - result = [] - for r in self: - result.append(getattr(r, name)(*t, **k)) - return result - def map_concat(self, name, *t, **k): - result = [] - for l in self.map(name, *t, **k): - result.extend(l) - return result - - def all(self, prop): - return ResourceList(self.map_concat('all', prop)) - - def has(self, prop): - for b in self.map('has', prop): - if b: - return True - return False - - def get(self, prop): - return ResourceList(self.map('get', prop)) - __getitem__ = get - - def load(self): - self.map('load') - return self - - def load_same_as(self): - self.map('load_same_as') - return self - - def sort(self, prop): - lst = [] - for x in self: - key = x.get(prop) - lst.append((key, x)) - lst.sort() - return ResourceList([y for x, y in lst]) - - def join(self, sep=''): - return sep.join(map(str, self)) - - # Set functions - @gives_list - @takes_list - def add(self, others): - for x in others: - yield x - for x in self: - yield x - union = add - - @gives_list - @takes_list - def remove(self, others): - dct = dict.fromkeys(others) - for x in self: - if x not in dct: - yield x - intersection = remove - - -class Resource(object): - isResource = True - is_resource = True - - def __init__(self, graph, datum): - if getattr(datum, 'is_resource', False): - datum = datum._get_raw_datum() - assert datum.is_node, datum - self.graph = graph - self.datum = datum - self.same_as_resources = [] - if datum.is_uri: - self._uri = unicode(datum) - - def _get_raw_datum(self): - return self.datum - - def _all_resources(self): - return [self] - - def __eq__(self, other): - if getattr(other, 'is_resource', False): - other = other.datum - return self.datum == other - - def is_literal(self): - return self.datum.is_literal - def is_uri(self): - return self.datum.is_uri - def is_blank(self): - return self.datum.is_blank - def __nonzero__(self): - return not self.is_blank() - def __str__(self): - return unicode(self.datum) - def __repr__(self): - return "Resource(" + repr(self.datum) + ")" - def __cmp__(self, other): - return cmp(self.datum, other.datum) - dump = __str__ - - def uri(self): - assert self.is_uri(), self - return self.datum.value() - - def value(self): - if self.is_uri(): - return self._uri - elif self.is_blank(): - return None - else: - # Literal - return self.datum.value() - - def label(self): - lbl = self.get( - "skos:prefLabel", - "rdfs:label", - "foaf:name", - "dct:title", - "dc:title", - "sioc:name", - ) - if lbl: - return str(lbl) - return lbl - - def has_label(self): - return bool(self.label()) - - isURIResource = True - - def __hash__(self): - return hash(self.datum) - - def _all_resources(self): - return [self] + self.same_as_resources - - def properties(self): - seen = {} - for y, z in self.property_values(): - if y not in seen: - seen[y] = True - yield y - - def inverse_properties(self): - seen = {} - for y, z in self.inverse_property_values(): - if y not in seen: - seen[y] = True - yield y - - def property_values(self): - for res in self._all_resources(): - for x, y, z in self.graph.triples(res._get_raw_datum(), None, None): - yield y, z - - def inverse_property_values(self): - for res in self._all_resources(): - for x, y, z in self.graph.triples(None, None, res._get_raw_datum()): - yield y, x - - def get(self, *props): - "Get a property" - for prop in props: - for x in self.all(prop): - return x - return None - __getitem__ = get - - def add(self, prop, obj): - if not getattr(obj, 'is_resource', False): - obj = self.graph.literal(obj) - self.graph.add(self, prop, obj) - return self - def set(self, prop, obj): - if not getattr(obj, 'is_resource', False): - obj = self.graph.literal(obj) - self.graph.remove(self, prop, None) - self.graph.add(self, prop, obj) - return self - __setitem__ = set - - def type(self): - return self['rdf:type'] - - def _parse_prop(self, prop): - invert = False - assert isinstance(prop, (str, unicode)) - if prop[0] == '-': - invert, prop = True, prop[1:] - prop = self.graph._parse_property(prop) - return prop, invert - - def all(self, prop): - "Get a list of properties" - prop, invert = self._parse_prop(prop) - if invert: - for x, y, z in self.graph.triples(None, prop, self._get_raw_datum()): - yield x - else: - for x, y, z in self.graph.triples(self._get_raw_datum(), prop, None): - yield z - - def has(self, prop): - "Returns True iff the resource has a value for this property" - prop, invert = self._parse_prop(prop) - if invert: - return self.graph.has_triple(None, prop, self._get_raw_datum()) - else: - return self.graph.has_triple(self._get_raw_datum(), prop, None) - - def load(self): # URIResource - self.graph.load(self._uri, allow_error=True) - return self - - def load_same_as(self): # URIResource - for i in [ - self.all('owl:sameAs'), - self.all('-owl:sameAs'), - ]: - for other in i: - other = Resource(self.graph, other) - if other not in self.same_as_resources: - self.same_as_resources.append(other) - other.load() - return self - - def to_string(self, extended=True): # Resource - return self.graph.dump_resources(self._all_resources(), extended=extended) - - def short_html(self): # URIResource - import cgi - import urllib - uri = self._uri - short_uri = self.shrink_uri() - return '%s' % ( - cgi.escape(urllib.quote(uri)), - cgi.escape(short_uri), - ) - - def dump(self, extended=True): # URIResource - import cgi - def quote(s): - return cgi.escape(unicode(s)) - def format(v): - if callable(getattr(v, 'short_html', None)): - return v.short_html() - return quote(v) - s = '

' - if self.has('foaf:name'): - s += '

' + quote(self['foaf:name']) + '

' - s += '%s' % ( - quote(self._uri), - quote(self._uri), - ) - s += '

' - for prop in self.properties(): - s += "→ %s → %s
\n" % ( - quote(prop), - quote(prop), - self.graph.shrink_uri(prop), - ', '.join(map(format, self.all(prop))), - ) - if extended: - for prop in self.inverse_properties(): - s += "← is %s of ← %s
\n" % ( - quote(prop), - quote(prop), - self.graph.shrink_uri(prop), - ', '.join(map(format, self.all('-'+prop))), - ) - s += '

' - return s - - def shrink_uri(self): - return self.graph.shrink_uri(self._uri) - def expand_uri(self): - return self.graph.expand_uri(self._uri) - - def in_ns(self, ns): - uri = self.expand_uri() - prefixes = self.graph.prefixes() - ns = prefixes.get(ns, ns) - return uri.startswith(ns) - - def get_ns(self): - uri = self.expand_uri() - for pre, full in self.graph.prefixes().items(): - if uri.startswith(full): - return full - p = uri.rfind('#') - if p >= -1: - return uri[:p] - return uri[:uri.rfind('/')] - - -# -# The SPARQL/Endpoint/Dataset bit -# - -class SparqlList(Reiterable): - def _get(self, var): - for dct in self: - yield dct[var] - def get(self, var): - return ResourceList(self._get(var)) - __getitem__ = get - - def count(self, var): - total = 0 - for dct in self: - total += dct[var] - return total - -class Endpoint(object): - def __init__(self, uri, dataset): - self.uri = uri - assert getattr(dataset, 'is_dataset', False), dataset - self.dataset = dataset - self.graph = dataset.create_graph() - self.engine = self.create_engine() - - def create_engine(self): - return Jena() - - def select(self, query): - "Make a SPARQL SELECT and traverse the results" - return SparqlList(self.graph._parse_sparql_result( - self.engine.load_sparql(self.uri, query) - )) - - def construct(self, graph, query): - "Load data into memory from a SPARQL CONSTRUCT" - graph.engine.import_sparql(self.uri, query) - return self - - -class Dataset(object): - """Extends Graph with a set of SPARQL endpoints and hooks that load - data from these endpoints as you query through the Graph interface. - - The intent is to facilitate gathering exactly the data you want from - anywhere it happens to be and make it easy to interrogate as possible. - - And I shall call this module... Magic Spaqrls!""" - is_dataset = True - stats_class = SparqlStats - graph_class = Graph - - def __init__(self, endpoint=None, uri=None, namespaces=None): - self.endpoints = {} - self.endpoint_stats = {} - self.graphs = [] - self._triple_query_cache = {} - self.namespaces = namespaces or {} - if endpoint: - self.add_endpoints(endpoint) - # The data cache is a sort of default graph. - self.data_cache = self.create_graph(uri=uri, namespaces=namespaces) - - def get(self, *t, **k): - return self.data_cache.get(*t, **k) - resource = get - __getitem__ = get - - def endpoint(self, uri): - if uri in self.endpoints: - return self.endpoints[uri] - return Endpoint(uri, self) - - @takes_list - def add_endpoint(self, endpoints): - for resource in endpoints: - uri = resource.uri() - self.endpoints[uri] = Endpoint(uri, self) - self.endpoint_stats[uri] = self.stats_class(uri, self) - return self - add_endpoints = add_endpoint - - def add_graph(self, graph): - self.graphs.append(graph) - - def create_graph(self, *t, **k): - if 'namespaces' not in k: - k['namespaces'] = self.namespaces - g = self.graph_class(*t, **k) - self.add_graph(g) - return g - - def _in_cache(self, endpoint, triple): - "Do a wild-card safe test for caching" - tests = [] - cache = self._triple_query_cache.get(endpoint, None) - if not cache: - return False - x, y, z = triple - xs = [None] - ys = [None] - zs = [None] - if x is not None: xs.append(x) - if y is not None: ys.append(y) - if z is not None: zs.append(z) - for x in xs: - for y in ys: - for z in zs: - t = (x, y, z) - if t in cache: - return True - return False - - def select_endpoints(self, *t): - if NoAutoQuery.active(): - return [] - all_endpoints = self.endpoints.keys() - endpoints = [] - if len(t) == 0: - raise RuntimeError("select for what?") - elif len(t) == 3: - for ep, stats in self.endpoint_stats.items(): - if not self._in_cache(ep, t): - if stats.use_for_triple(t): - endpoints.append(ep) - else: - for ep, stats in self.endpoint_stats.items(): - if stats.use_for_query(t[0]): - endpoints.append(ep) - return endpoints - - def _make_query(self, text): - query = "" - for prefix, uri in self.prefixes().items(): - query += "PREFIX " + prefix + ": <" + uri + ">\n" - query += text - return query - - def _make_query_value(self, v, uri=False): - if uri or getattr(v, 'is_uri', False) or getattr(v, 'isURIResource', False): - return "<"+unicode(v)+">" - else: - return '"'+unicode(v)+'"' - - def triples(self, x, y, z): - x = self._parse_subject(x) - y = self._parse_property(y) - z = self._parse_object(z) - endpoints = list(self.select_endpoints(x, y, z)) - if endpoints: - if x: - qx = self._make_query_value(x, uri=True) - else: - qx = '?x' - if y: - qy = self._make_query_value(y, uri=True) - else: - qy = '?y' - if z: - qz = self._make_query_value(z) - else: - qz = '?z' - query = self._make_query(""" - CONSTRUCT { %(x)s %(y)s %(z)s } - WHERE { %(x)s %(y)s %(z)s } - """ % { - 'x': qx, - 'y': qy, - 'z': qz, - }) - for uri in endpoints: - if Config.sparql_debug: - print("Auto-query: {0}".format(uri)) - print(query) - self._triple_query_cache.setdefault(uri, {})[(x, y, z)] = True - self.endpoint(uri).construct(self.data_cache, query) - # - # TODO: Aggregate results from all graphs here. - # - import itertools - iters = [] - for g in self.graphs: - iters.append(g.triples(x, y, z)) - return ResourceList(itertools.chain(iters)) - # - - def sparql(self, query, *t, **k): - # TODO: Detect grouping and handle count aggregation differently - iter = self._sparql(query, *t, **k) - return SparqlList(iter) - def _sparql(self, query): - for g in self.graphs: - for x in g.sparql(query): - yield x - for uri in self.select_endpoints(query): - for x in self.endpoint(uri).select(query): - yield x - - def _load_all_sparql(self, query): - for uri in self.select_endpoints(query): - raise NotImplementedError("Implement Endpoint class for 'read_sparql'") - for x in self.endpoint(uri).select(query): - yield x - - # SPARQL query methods - def describe(self, query): - self._load_all_sparql("describe "+query) - return self - def construct(self, query): - self._load_all_sparql("construct "+query) - return self - - def to_string(self, *t, **k): - # TODO: Dump all local graphs and the local caches of all Endpoints. - return self.data_cache.to_string(*t, **k) - - -# -# The JPype/Jena bit. -# - - -class Engine(object): - """Defines an interface for an RDF triple store and query engine. - """ - def sparql(self, query_text): - raise NotImplementedError("SPARQL querying not supported by this engine") - - def triples(self, subject, predicate, object): - raise NotImplementedError("Select triples from the store") - - def load_uri(self, uri, format=TURTLE): - raise NotImplementedError("Load RDF from a URI into the store") - - def load_text(self, text, format=TURTLE, encoding='utf-8'): - raise NotImplementedError("Load RDF from a string into the store") - - def dump(self, format=TURTLE): - return self.to_string(format=format) - - def to_string(self, format=TURTLE): - raise NotImplementedError("Dump RDF as a string") - - def expand_uri(self, uri): - raise NotImplementedError("Expand a URI's shorthand prefix") - - def add_namespace(self, prefix, uri): - raise NotImplementedError("Register a namespace and it's prefix") - -import warnings -warnings.filterwarnings("ignore", message="the sets module is deprecated") - -try: - import jpype - del jpype -except ImportError: - raise RuntimeError("Install JPype: http://sourceforge.net/projects/jpype/") - -from jpype import startJVM, shutdownJVM, ByteArrayCustomizer, \ - CharArrayCustomizer, ConversionConfig, ConversionConfigClass, JArray, \ - JBoolean, JByte, JChar, JClass, JClassUtil, JDouble, JException, \ - JFloat, JInt, JIterator, JLong, JObject, JPackage, JProxy, JString, \ - JavaException, java -_jvm_running = False -def runJVM(): - global _jvm_running - if _jvm_running: - return - jvm_args = [ - # Be a bit more reasonable with Java memory - '-XX:MaxHeapFreeRatio=30', - '-XX:MinHeapFreeRatio=10', - ] - import os - - if os.name == 'nt': - cp_sep = ';' - else: - cp_sep = ':' - - java_classpath = [ - Config.jena_libs+'jena-2.6.4.jar', - Config.jena_libs+'log4j-1.2.13.jar', - Config.jena_libs+'arq-2.8.8.jar', - Config.jena_libs+'slf4j-api-1.5.8.jar', - Config.jena_libs+'slf4j-log4j12-1.5.8.jar', - Config.jena_libs+'xercesImpl-2.7.1.jar', - Config.jena_libs+'iri-0.8.jar', - Config.jena_libs+'icu4j-3.4.4.jar', - Config.jena_libs+'stax-api-1.0.1.jar', - ] - jvm_file = Config.jvm_file - if not jvm_file: - import jpype - jvm_file = jpype.getDefaultJVMPath() - if not jvm_file: - home = os.environ.get('JAVA_HOME', '') - if os.name == 'nt': - jvm_file = os.path.join(home, 'bin','client','jvm.dll') - else: - jvm_file = os.path.join(home, 'jre', 'lib', 'amd64', 'server', 'libjvm.so') - - if java_classpath: - jvm_args.append("-Djava.class.path=" + cp_sep.join( - map(os.path.abspath, java_classpath)) - ) - - - startJVM(jvm_file, *jvm_args) - _jvm_running = True - -class Node(object): - "Represents a graph node to the engine" - is_node = True - is_blank = False - is_uri = False - is_literal = False - def __init__(self, datum, **k): - self.datum = datum - self.init(**k) - assert self.check(), datum - def init(self): - pass - - def __str__(self): - return unicode(self.datum) - def __repr__(self): - return self.__class__.__name__+'('+repr(self.datum)+')' - - def __eq__(self, other): - if getattr(other, 'is_node', False): - if self.__class__ is not other.__class__: - return False - other = other.datum - return self.datum == other - - def check(self): - return True - -class URINode(Node): - is_uri = True - def value(self): - if isinstance(self.datum, unicode): - return self.datum - return unicode(self.datum, 'utf-8') - - def check(self): - uri = self.datum - assert isinstance(uri, (str, unicode)), (uri, type(uri)) - assert (type(uri) in (str, unicode)), (uri, type(uri)) - return True -class Literal(Node): - is_literal = True - def value(self): - return self.datum - def init(self, datatype=None): - self.datatype = datatype -class Blank(Node): - is_blank = True - def value(self): - return None - -class Jena(object): - def __init__(self, debug=False): - if debug: - if callable(debug): - self.debug = debug - else: - def debug(x): - print(x) - self.debug = debug - runJVM() - - def debug(self, msg): pass - - def _parse_literal(self, lit): - if isinstance(lit, JClass("com.hp.hpl.jena.rdf.model.Literal")): - lit = lit.getValue() - if isinstance(lit, java.lang.Integer): - return Literal(lit.intValue()) - elif isinstance(lit, java.lang.String): - return Literal(lit.toString()) - elif isinstance(lit, java.lang.Float): - return Literal(lit.floatValue()) - elif isinstance(lit, java.lang.Boolean): - return Literal(lit.boolValue()) - # TODO: Add conversions for *all* RDF datatypes - return Literal(lit) - - def _parse_resource(self, res): - if res.isAnon(): - return Blank(res.getId()) - elif res.isLiteral(): - return self._parse_literal(res.asLiteral()) - elif res.isURIResource(): - return URINode(res.getURI()) - - def _iter_sparql_results(self, qexec): - try: - jresults = qexec.execSelect() # ResultsSet - while jresults.hasNext(): - result = {} - soln = jresults.nextSolution() # QuerySolution - for name in soln.varNames(): - try: - v = soln.getResource(name) # Resource // Get a result variable - must be a resource - if v: - v = URINode(v.getURI()) - except: - v = soln.getLiteral(name) # Literal // Get a result variable - must be a literal - v = self._parse_literal(v) - result[name] = v - yield result - finally: - qexec.close() - - def load_sparql(self, endpoint, query): - q_pkg = JPackage("com.hp.hpl.jena.query") - qexec = q_pkg.QueryExecutionFactory.sparqlService(JString(endpoint), JString(query)) - return self._iter_sparql_results(qexec) - - -class JenaGraph(Engine, Jena): - _jena_pkg_name = 'com.hp.hpl.jena' - - def __init__(self, **k): - super(JenaGraph, self).__init__(**k) - self.jena_model = None - self.get_model() - - def get_model(self): - if not self.jena_model: - klass = JClass(self._jena_pkg_name+'.rdf.model.ModelFactory') - self.jena_model = klass.createDefaultModel() - return self.jena_model - - def _new_submodel(self): - model = JClass('com.hp.hpl.jena.rdf.model.ModelFactory').createDefaultModel() - model = model.setNsPrefixes(self.jena_model.getNsPrefixMap()) - return model - - def add_inference(self, type): - if type == 'schema': - model = JClass(self._jena_pkg_name+'.rdf.model.ModelFactory') \ - .createRDFSModel(self.get_model()) - self.jena_model = model - else: - raise RuntimeError("Unknown inference type", type) - - def expand_uri(self, uri): - return str(self.get_model().expandPrefix(JString(uri))) - - def shrink_uri(self, uri): - return str(self.get_model().shortForm(JString(uri))) - - def _mk_resource(self, res): - "Make this Subject thing suitable to pass to Jena" - if res is None: - return JObject(None, - JPackage(self._jena_pkg_name).rdf.model.Resource, - ) - assert getattr(res, 'is_node', False), (res, type(res)) -# assert res.is_uri, res # XXX: TODO: This breaks with blank nodes, and shouldn't - uri = res.datum - assert isinstance(uri, (unicode, str)), (uri, type(uri)) - return JObject( - self.get_model().createResource(JString(uri)), - JPackage(self._jena_pkg_name).rdf.model.Resource, - ) - - def _mk_property(self, uri): - "Make this Property thing suitable to pass to Jena" - if uri is None: - return JObject(None, - JPackage(self._jena_pkg_name).rdf.model.Property, - ) - assert getattr(uri, 'is_node', False), uri - assert uri.is_uri, uri - uri = uri.datum - assert isinstance(uri, (unicode, str)), (uri, type(uri)) - return JObject( - self.get_model().createProperty(JString(uri)), - JPackage(self._jena_pkg_name).rdf.model.Property, - ) - - def _mk_object(self, obj): - "Make this Object thing suitable to pass to Jena" - if obj is None: - return JObject( - None, - JPackage(self._jena_pkg_name).rdf.model.RDFNode, - ) - assert getattr(obj, 'is_node', False), res - if obj.is_uri: - return JObject( - self.get_model().createResource(obj.datum), - JPackage(self._jena_pkg_name).rdf.model.RDFNode, - ) - elif obj.is_blank: - return obj.datum - else: - value = obj.value() - if isinstance(value, (str, unicode)): - value = JString(value) - return JObject( - self.get_model().createTypedLiteral(value), - JPackage(self._jena_pkg_name).rdf.model.RDFNode, - ) - - def as_node(self, obj): - return JObject( - self.get_model().createResource(obj.uri), - JPackage(self._jena_pkg_name).rdf.model.RDFNode, - ) - - def get_jena_format(self, format): - if isinstance(format, str): - return format - if format == TURTLE: - format = "TTL" - elif format == N3: - format = "N3" - elif format == NTRIPLE: - format = "N-TRIPLE" - elif format == RDFXML or format is None: - format = "RDF/XML" - else: - raise RuntimeError("bad format", format) - return format - - def load_uri(self, uri, format=None, allow_error=False): - self.debug("JENA load "+uri) - format = self.get_jena_format(format) - jena = self.get_model() - try: - jena = jena.read(uri, format) - except: - if not allow_error: raise - else: - self.jena_model = jena - - def load_text(self, text, format=TURTLE, encoding='utf-8'): - format = self.get_jena_format(format) - self.debug("JENA load text "+format) - jena = self.get_model() - uri = "tag:string-input" - if not isinstance(text, unicode): - text = unicode(text, encoding) - jstr = JString(text) - input = JClass('java.io.StringReader')(jstr) - jena = jena.read(input, uri, format) - self.jena_model = jena - - def import_sparql(self, endpoint, query): - q_pkg = JPackage("com.hp.hpl.jena.query") - qexec = q_pkg.QueryExecutionFactory.sparqlService(JString(endpoint), JString(query)) - qexec.execConstruct(self.jena_model) - - def has_triple(self, x, y, z): - self.debug(' '.join(["JENA has_triple ", repr(x), repr(y), repr(z)])) - jena = self.get_model() - sub = self._mk_resource(x) - pred = self._mk_property(y) - ob = self._mk_object(z) - return bool(jena.contains(sub, pred, ob)) - - def set_triple(self, x, y, z): - self.debug(' '.join(["JENA add_triple ", repr(x), repr(y), repr(z)])) - jena = self.get_model() - sub = self._mk_resource(x) - pred = self._mk_property(y) - ob = self._mk_object(z) - stmt = jena.createStatement( - sub, - pred, - ob, - ) - jena.add(stmt) - - def remove_triples(self, x, y, z): - self.debug(' '.join(["JENA remove_triples ", repr(x), repr(y), repr(z)])) - jena = self.get_model() - sub = self._mk_resource(x) - pred = self._mk_property(y) - ob = self._mk_object(z) - jena.removeAll(sub, pred, ob) - - def triples(self, x, y, z): - self.debug(' '.join(["JENA triples ", repr(x), repr(y), repr(z)])) - jena = self.get_model() - sub = self._mk_resource(x) - pred = self._mk_property(y) - ob = self._mk_object(z) - - for stmt in jena.listStatements( - sub, - pred, - ob, - ): - a = self._parse_resource(stmt.getSubject()) - assert a, (a, stmt) - b = self._parse_resource(stmt.getPredicate()) - assert b, (b, stmt) - c = self._parse_resource(stmt.getObject()) - assert c, (c, stmt) - yield a, b, c - - def _dump_model(self, model, format="TTL"): - out = JPackage('java').io.StringWriter() - model.write(out, format) - return unicode.encode(out.toString(), 'utf-8') - - def dump_resources(self, resources, format="TTL", extended=False): - model = self._new_submodel() - for res in resources: - res = res.datum - model.add(self.get_model().listStatements( - self._mk_resource(res), - self._mk_property(None), - self._mk_object(None), - )) - if extended: - model.add(self.get_model().listStatements( - self._mk_resource(None), - self._mk_property(None), - self._mk_object(res), - )) - return self._dump_model(model) - - def to_string(self, format="TTL"): - return self._dump_model(self.get_model(), self.get_jena_format(format)) - - def dump(self, *t, **k): - return self.to_string(*t, **k) - - def add_namespace(self, prefix, uri): - self.get_model().setNsPrefix(prefix, uri) - - def namespaces(self): - ns_dict = {} - for prefix in self.get_model().getNsPrefixMap().entrySet(): - ns_dict[str(prefix.getKey())] = URINode(prefix.getValue(), self) - return ns_dict - - def sparql(self, query_text): # JenaGraph - q_pkg = JPackage("com.hp.hpl.jena.query") - model = self.get_model() - query = q_pkg.QueryFactory.create(query_text) - qexec = q_pkg.QueryExecutionFactory.create(query, model) - return self._iter_sparql_results(qexec) - - -class RdflibGraph(Engine, Jena): - """Defines a mechanism for accessing a triple store in rdflib. - """ - def __init__(self, **k): - super(RdflibGraph, self).__init__(**k) - import rdflib - import rdfextras - self.graph = rdflib.Graph() - - def sparql(self, query_text): - qres = self.graph.query(query_text) - qvars = qres.vars - #raise RuntimeError(qres.bindings, query_text) - for soln in qres.bindings: - d = {} - for v in qvars: - try: - value = soln[v] - except KeyError: - continue - parsed_value = self._convert_rdflib_value(value) - d[v.toPython()[1:]] = parsed_value - #raise RuntimeError(d, soln) - yield d - #raise NotImplementedError, "SPARQL querying not supported by this engine" - - def _convert_data_value(self, val): - if val is None: return None - import rdflib - if isinstance(val, URINode): - return rdflib.URIRef(val.value()) - if isinstance(val, Literal): - return rdflib.Literal(val.value()) - raise ValueError(val) - - def _convert_rdflib_value(self, val): - if val is None: - raise ValueError(val) - import rdflib - if isinstance(val, rdflib.URIRef): - return URINode(val.toPython()) - if isinstance(val, rdflib.BNode): - return Blank(str(val)) - if isinstance(val, rdflib.Literal): - datatype = val.datatype - if datatype is not None: - datatype = datatype.toPython() - return Literal(val.toPython(), datatype=datatype) - raise ValueError(val) - - def set_triple(self, subject, predicate, object): - self.graph.add(( - self._convert_data_value(subject), - self._convert_data_value(predicate), - self._convert_data_value(object), - )) - - def remove_triples(self, subject, predicate, object): - self.graph.remove(( - self._convert_data_value(subject), - self._convert_data_value(predicate), - self._convert_data_value(object), - )) - - def _triples(self, subject, predicate, object): - for s, p, o in self.graph.triples(( - self._convert_data_value(subject), - self._convert_data_value(predicate), - self._convert_data_value(object), - )): - yield ( - self._convert_rdflib_value(s), - self._convert_rdflib_value(p), - self._convert_rdflib_value(o), - ) - - def triples(self, subject, predicate, object): - return list(self._triples(subject, predicate, object)) - - def load_uri(self, uri, format=TURTLE): - return self.graph.parse(uri, format=self._convert_format_id(format)) - - def load_text(self, text, format=TURTLE, encoding='utf8'): - #u_text = text.decode(encoding) - u_text = text - return self.graph.parse(data=u_text, format=self._convert_format_id(format)) - - def _convert_format_id(self, format): - if format in (TURTLE, N3): - return 'n3' - if format in (NTRIPLE, ): - return 'n3' - if format in (RDFXML, ): - return 'xml' - raise ValueError("Unhandled RDF format descriptor", format) - - def expand_uri(self, uri): - if ':' not in uri: - return uri - prefix, rest = uri.split(':', 1) - for p, r in self.graph.namespaces(): - if prefix == p: - return r + rest - return uri - - def add_namespace(self, prefix, uri): - return self.graph.bind(prefix, uri, True) - - def to_string(self, format=TURTLE): - return self.graph.serialize(format=self._convert_format_id(format)) - - -# Hook in a more sensible default ;-) -Graph.use_rdflib() - diff --git a/setup.py b/setup.py deleted file mode 100755 index 3724619..0000000 --- a/setup.py +++ /dev/null @@ -1,24 +0,0 @@ - -from distutils.core import setup - -setup( - name='python-graphite', - version='0.2.2', - author='Ronan Klyne', - author_email='python-graphite@rklyne.net', - packages=['graphite'], - package_data={ - 'graphite': [ - 'config.ini', - '*.txt', - 'Jena-2.6.4/*.txt', - 'Jena-2.6.4/*.html', - 'Jena-2.6.4/lib/*.jar', - ], - }, - scripts=[], - url='http://code.google.com/p/python-graphite/', - license='LICENSE.txt', - description='A flexible RDF hacking library built on JPype and Jena', - long_description=open('README.txt').read(), -) diff --git a/test/test.py b/test/test.py deleted file mode 100755 index 0505ce7..0000000 --- a/test/test.py +++ /dev/null @@ -1,351 +0,0 @@ -#!/usr/bin/env python -""" -These test may seem a bit light. If they don't then they should. - -Jena is doing *all* the hard work here - I'm just testing that it's all wired up properly. -""" - -import unittest - -class Test(unittest.TestCase): - verbose = False - - def new_graph(self, g=None): - if g is None: - g = rdfgraph.Graph() - self.g = g - - def setUp(self): - self.new_graph() - def tearDown(self): - self.g = None - - def file_data(self, data): - return TempFile(data) - -SAMPLE_RDFXML = """ - - - - - -""" -SAMPLE_NTRIPLES = """ - . -""" -SAMPLE_N3 = """@prefix rdf: . - rdf:type . -""" -SAMPLE_TTL = """ - - a . -""" -SAMPLE_RDFXML_BNODE = """ - - - - - -""" - -class TempFile(object): - def __init__(self, data): - assert isinstance(data, str) # Only permit bytes here. - self.data = data - - def __enter__(self): - import tempfile - tpl = tempfile.mkstemp() - fn, self.name = tpl - tf = open(self.name, 'wb') - tf.write(self.data) - tf.close() - return self.name - - def __exit__(self, a,b,c): - try: - import os - os.remove(self.name) - except: pass - -class TestRead(Test): - def test_read_XML(self): - self.g.load_rdfxml(SAMPLE_RDFXML) - self.assertEquals( - self.g['tag:dummy1']['rdf:type'].uri(), - 'tag:dummy2', - self.g.to_string() - ) - - def test_read_XML_Bnode(self): - self.g.load_rdfxml(SAMPLE_RDFXML_BNODE) - for t in self.g.triples(None, None, None): - if self.verbose: print str(t) - if self.verbose: print repr(t) - self.assertEquals( - t[2].uri(), - 'tag:dummy2', - self.g.to_string() - ) - - def test_read_NTRIPLE(self): - self.g.load_ntriples(SAMPLE_NTRIPLES) - self.assertEquals( - self.g['tag:dummy1']['rdf:type'].uri(), - 'tag:dummy2', - self.g.to_string() - ) - - def test_read_N3(self): - self.g.load_N3(SAMPLE_N3) - self.assertEquals( - self.g['tag:dummy1']['rdf:type'].uri(), - 'tag:dummy2', - self.g.to_string() - ) - - def test_read_TTL(self): - self.g.load_ttl(SAMPLE_TTL) - self.assertEquals( - self.g['tag:dummy1']['rdf:type'].uri(), - 'tag:dummy2', - self.g.to_string() - ) - -class TestGraph(Test): - def setUp(self): - super(TestGraph, self).setUp() - self.g.load_ttl(""" - - a . - - """) - - def test_get(self): - r = self.g.get('tag:dummy1') - self.failUnless(r) - self.failUnless(getattr(r, 'isURIResource', False), r) - - def test_set(self, other=None): - r = self.g.get('tag:dummy1') - if other is None: - other = self.g['tag:other'] - r['tag:p'] = other - self.failUnless(r['tag:p']) - self.assertEquals(r['tag:p'], other) - - def test_set_char(self): - # Check single characters - r = self.g.get('tag:dummy1') - char = 'A' - r['tag:char'] = char - self.failUnless(r['tag:char']) - self.assertEquals(r['tag:char'], char) - - def test_set_literal(self): - self.test_set(other=2) - self.test_set(other="Wibble") - - -class TestURIResource(Test): - def setUp(self): - super(TestURIResource, self).setUp() - self.g.load_ttl(""" - - a ; - ; - ; - ; - [ a ]. - """) - self.r = self.g.get('tag:dummy1') - self.r.add('tag:int', 2) - self.r.add('tag:int', 3) - self.r['tag:str'] = "22" - self.t = self.g.get('tag:dummy2') - - def test_get(self): - self.assertEquals(self.r.get('rdf:type'), self.t) - self.assertEquals(self.r['tag:str'], "22") - - def test_blank(self): - b = self.r['tag:b'] - self.failIf(b, b) - self.failUnless(b.is_blank(), b) - - def test_all(self): - lst = list(self.r.all('tag:r1')) - self.assertEquals(len(lst), 2) - self.failUnless(self.g['tag:1'] in lst, lst) - self.failUnless(self.g['tag:2'] in lst, lst) - - lst = list(self.r.all('tag:int')) - self.assertEquals(len(lst), 2) - for i in [2, 3]: - self.failUnless(i in lst) - - def test_has(self): - self.failUnless(self.r.has('tag:int')) - self.failUnless(self.r.has('tag:r1')) - self.failUnless(self.r.has('tag:b')) - self.failIf(self.r['tag:r1'].has('tag:r1')) - - def test_value(self): - s = "22" - vr = self.r['tag:str'] - self.failUnless(vr) - self.assertEquals(vr, s) - self.failUnless(isinstance(vr, rdfgraph.Resource), `vr`) - v = vr.value() - self.failIf(isinstance(v, rdfgraph.Resource), `v`) - self.failIf(isinstance(v, rdfgraph.Node), `v`) - self.assertEquals(v, s) - - def test_uri(self): - uri = 'tag:dummy1' - r = self.r - self.assertEquals(r, uri) - self.failUnless(isinstance(r, rdfgraph.Resource), `r`) - v = r.value() - self.failIf(isinstance(v, rdfgraph.Resource), `v`) - self.failIf(isinstance(v, rdfgraph.Node), `v`) - self.assertEquals(v, uri) - - -class TestResourceList(Test): - def setUp(self): - super(TestResourceList, self).setUp() - self.r1 = self.g.get('tag:1') - self.r2 = self.g.get('tag:2') - self.failIf(self.r1 is self.r2) - self.assertNotEquals(self.r1.datum, self.r2.datum) - self.assertNotEquals(self.r1, self.r2) - self.assertEquals(self.r1, self.g.get('tag:1')) - - def tearDown(self): - super(TestResourceList, self).tearDown() - self.r1 = None - self.r2 = None - - def test_add(self): - lst1 = rdfgraph.ResourceList([self.r1]) - lst2 = rdfgraph.ResourceList([self.r2]) - lst3 = lst1.add(lst2) - self.failUnless(self.r1 in lst3, lst3) - self.failUnless(self.r2 in lst3, lst3) - - def test_remove(self): - lst1 = rdfgraph.ResourceList([self.r1, self.r2]) - lst2 = rdfgraph.ResourceList([self.r2]) - lst3 = lst1.remove(lst2) - self.failUnless(self.r1 in lst3, list(lst3)) - self.failIf( self.r2 in lst3, list(lst3)) - - def test_join(self): - lst1 = rdfgraph.ResourceList([self.r1, self.r2]) - self.assertEquals( - lst1.join(", "), - "tag:1, tag:2", - lst1 - ) - -class TestUnicode(Test): - - u_lit = u'\x9c' # What iso-8859-1 calls '\xa3' - the British Pound sign. - u_ttl = ''' - @prefix xsd: . - "\xc2\x9c"^^xsd:string . - ''' - _rel = 'tag:new_relation' - _res = 'tag:new_resource' - - def assert_loaded(self, g=None): - if g is None: - g = self.g - ts = list(g.triples(None, None, None)) - self.assertEquals(len(ts), 1) - self.assertEquals(self.u_lit, g[self._res][self._rel]) - - def assert_not_loaded(self, g=None): - if g is None: - g = self.g - ts = list(g.triples(None, None, None)) - self.assertEquals(len(ts), 0) - - def test_ttl_load(self): - self.g.load_turtle(self.u_ttl) - self.assert_loaded() - - def test_ttl_load_file(self, use_cache=False): - import os - self.assert_not_loaded() - with self.file_data(self.u_ttl) as f: - self.failUnless(os.path.isfile(f), f) - with open(f, 'rb') as fp: - self.failUnless(fp, f) - if use_cache: - uri = self.g.file_uri(f) - self.g.load(uri) - else: - self.g.load_file(f) - self.assert_loaded() - - def test_ttl_load_file_with_cache(self): - self.test_ttl_load_file(True) - - def test_set_literal(self): - r = self.g[self._res] - r.set(self._rel, self.u_lit) - self.assertEquals(self.u_lit, self.g[self._res][self._rel]) - - def test_save_and_load(self): - import tempfile - fno, name = tempfile.mkstemp() - self.g.load_turtle(self.u_ttl) - self.assert_loaded() - self.g.save_file(name) - - # The test of save is whether we can load it or not. - self.new_graph() - self.assert_not_loaded() - self.g.load_file(name) - self.assert_loaded() - - -class TestSparql(Test): - def setUp(self): - super(TestSparql, self).setUp() - self.g.load_ttl(""" - - a . - """) - - def test_select(self): - results = self.g.sparql("select ?s ?p ?o where {?s ?p ?o}") - self.failUnless(results) - for var, expected in [ - ('s', 'tag:dummy1'), - ('p', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), - ('o', 'tag:dummy2'), - ]: - lst = list(results[var]) - self.assertEquals(len(lst), 1) - self.assertEquals(lst[0], expected) - - -if __name__ == '__main__': - # A bit of bootstrap to make sure we test the right stuff - import sys - import os - mod_path = os.path.join(os.path.dirname(__file__), os.pardir) - mod_path = os.path.abspath(mod_path) - - print "Testing in", mod_path - sys.path.insert(0, mod_path) - - import graphite.rdfgraph as rdfgraph - globals()['rdfgraph'] = rdfgraph - - # Kick off the tests - unittest.main(argv=sys.argv) -

RDF data explorer

This aims to provide the most useful summary possible of any RDF data source.

Too many %ss to summarise

%s

Types (%s total)

Object summary not written

%s

RDF Schema explorer

Give me an RDF Type URI and I'll do my best to visualise it.

' - s += quote(typ_label) - s += '

' + quote(self['foaf:name']) + '