From d831833df109c98c6c104f3cf92d201e3e46f026 Mon Sep 17 00:00:00 2001 From: kgpayne Date: Sun, 7 Feb 2021 10:27:15 +0000 Subject: [PATCH 1/2] Added new entities and a to_dict() methods. (#1) * Added new relation entity and to_dict() methods. * More tidying. --- pyproject.toml | 3 + tableaudocumentapi/__init__.py | 5 +- tableaudocumentapi/base.py | 33 ++++++ tableaudocumentapi/connection.py | 185 +++++++++++++++++++++++++------ tableaudocumentapi/datasource.py | 48 ++++---- tableaudocumentapi/relation.py | 138 +++++++++++++++++++++++ tableaudocumentapi/workbook.py | 52 ++++++--- 7 files changed, 389 insertions(+), 75 deletions(-) create mode 100644 pyproject.toml create mode 100644 tableaudocumentapi/base.py create mode 100644 tableaudocumentapi/relation.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b0471b7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta:__legacy__" \ No newline at end of file diff --git a/tableaudocumentapi/__init__.py b/tableaudocumentapi/__init__.py index 6a10f6f..ebe0365 100644 --- a/tableaudocumentapi/__init__.py +++ b/tableaudocumentapi/__init__.py @@ -1,7 +1,10 @@ +from .base import BaseObject from .field import Field -from .connection import Connection +from .relation import Relation +from .connection import Connection, NamedConnection, RelationParser from .datasource import Datasource, ConnectionParser from .workbook import Workbook + __version__ = '0.0.1' __VERSION__ = __version__ diff --git a/tableaudocumentapi/base.py b/tableaudocumentapi/base.py new file mode 100644 index 0000000..7dcc0fb --- /dev/null +++ b/tableaudocumentapi/base.py @@ -0,0 +1,33 @@ + + +class BaseObject: + + def _to_dict( + self, base_attrs=[], to_dict_attrs=[], + to_dict_list_attrs=[], to_dict_of_dict_attrs=[] + ): + base = { + k.replace('_', ''): getattr(self, k) for k in base_attrs + if getattr(self, k) + } + base.update( + { + k: getattr(self, k).to_dict() for k in to_dict_attrs + if getattr(self, k) + } + ) + base.update( + { + k: [i.to_dict() for i in getattr(self, k)] + for k in to_dict_list_attrs + if getattr(self, k) + } + ) + base.update( + { + i: {k:v.to_dict() for k, v in getattr(self, i).items()} + for i in to_dict_of_dict_attrs + if getattr(self, i) + } + ) + return base diff --git a/tableaudocumentapi/connection.py b/tableaudocumentapi/connection.py index 30343b5..18cb22a 100644 --- a/tableaudocumentapi/connection.py +++ b/tableaudocumentapi/connection.py @@ -1,25 +1,41 @@ import xml.etree.ElementTree as ET +from tableaudocumentapi import BaseObject, Relation from tableaudocumentapi.dbclass import is_valid_dbclass -class Connection(object): - """A class representing connections inside Data Sources.""" +class RelationParser(object): + """Parser for detecting and extracting relations from Connection entities.""" - def __init__(self, connxml): - """Connection is usually instantiated by passing in connection elements - in a Data Source. If creating a connection from scratch you can call - `from_attributes` passing in the connection attributes. + def __init__(self, connection_xml, version): + self._connxml = connection_xml + self._relversion = version - """ + def get_relations(self): + """Find and return all relations.""" + relations_xml = self._connxml.findall('./relation') + if relations_xml: + return list(map(Relation, relations_xml)) + else: + return None + + +class BaseConnection(BaseObject): + + def __init__(self, connxml, version=None): self._connectionXML = connxml + self._class = connxml.get('class') self._dbname = connxml.get('dbname') self._server = connxml.get('server') self._username = connxml.get('username') self._authentication = connxml.get('authentication') - self._class = connxml.get('class') - self._port = connxml.get('port', None) - self._query_band = connxml.get('query-band-spec', None) - self._initial_sql = connxml.get('one-time-sql', None) + self._port = connxml.get('port') + self._channel = connxml.get('channel') + self._dataserver_permissions = connxml.get('dataserver-permissions') + self._directory = connxml.get('directory') + self._server_oauth = connxml.get('server-oauth') + self._workgroup_auth_mode = connxml.get('workgroup-auth-mode') + self._query_band = connxml.get('query-band-spec') + self._initial_sql = connxml.get('one-time-sql') def __repr__(self): return "''".format(self._server, self._dbname, hex(id(self))) @@ -39,9 +55,31 @@ def from_attributes(cls, server, dbname, username, dbclass, port=None, query_ban xml.port = port xml.query_band = query_band xml.initial_sql = initial_sql - return xml + @property + def class_(self): + """The type of connection (e.g. 'MySQL', 'Postgresql'). A complete list + can be found in dbclass.py""" + return self._class + + @class_.setter + def class_(self, value): + """Set the connection's dbclass property. + + Args: + value: New dbclass value. String. + + Returns: + Nothing. + """ + + if not is_valid_dbclass(value): + raise AttributeError("'{}' is not a valid database type".format(value)) + + self._class = value + self._connectionXML.set('class', value) + @property def dbname(self): """Database name for the connection. Not the table name.""" @@ -106,29 +144,6 @@ def username(self, value): def authentication(self): return self._authentication - @property - def dbclass(self): - """The type of connection (e.g. 'MySQL', 'Postgresql'). A complete list - can be found in dbclass.py""" - return self._class - - @dbclass.setter - def dbclass(self, value): - """Set the connection's dbclass property. - - Args: - value: New dbclass value. String. - - Returns: - Nothing. - """ - - if not is_valid_dbclass(value): - raise AttributeError("'{}' is not a valid database type".format(value)) - - self._class = value - self._connectionXML.set('class', value) - @property def port(self): """Port used to connect to the database.""" @@ -181,6 +196,26 @@ def query_band(self, value): else: self._connectionXML.set('query-band-spec', value) + @property + def channel(self): + return self._channel + + @property + def dataserver_permissions(self): + return self._dataserver_permissions + + @property + def directory(self): + return self._directory + + @property + def server_oauth(self): + return self._server_oauth + + @property + def workgroup_auth_mode(self): + return self._workgroup_auth_mode + @property def initial_sql(self): """Initial SQL to be run.""" @@ -206,3 +241,83 @@ def initial_sql(self, value): pass else: self._connectionXML.set('one-time-sql', value) + + def base_dict(self): + base_attrs = [ + 'class_', 'dbname', 'server', 'username', + 'authentication', 'port', 'channel', 'dataserver_permissions', + 'directory', 'server_oauth', 'workgroup_auth_mode', + 'query_band', 'initial_sql' + ] + base = self._to_dict( + base_attrs=base_attrs + ) + return base + + +class Connection(BaseConnection): + + def __init__(self, connxml, version=None): + super().__init__(connxml, version=None) + self._named_connections = self._extract_named_connections() + self._relation_parser = RelationParser( + connxml, version=version + ) + self._relations = self._relation_parser.get_relations() + + def _extract_named_connections(self): + named_connections = [ + conn for conn in self._connectionXML.findall('./named-connections/named-connection') + ] + return {nc.name: nc for nc in list(map(NamedConnection, named_connections))} + + @property + def named_connections(self): + return self._named_connections + + @property + def relations(self): + return self._relations + + def to_dict(self): + base = super().base_dict() + to_dict_list_attrs = ['relations'] + to_dict_of_dict_attrs = ['named_connections'] + base.update( + self._to_dict( + to_dict_list_attrs=to_dict_list_attrs, + to_dict_of_dict_attrs=to_dict_of_dict_attrs + ) + ) + return base + + +class NamedConnection(BaseConnection): + """A class representing connections inside Data Sources.""" + + def __init__(self, connxml, version=None): + """Connection is usually instantiated by passing in connection elements + in a Data Source. If creating a connection from scratch you can call + `from_attributes` passing in the connection attributes. + + """ + assert connxml.tag == 'named-connection', "Must be of type named-connection" + super().__init__(connxml.find('./connection'), version=version) + self._name = connxml.get('name') + self._caption = connxml.get('caption') + + @property + def name(self): + return self._name + + @property + def caption(self): + return self._caption + + def to_dict(self): + base = super().base_dict() + base_attrs = ['name', 'caption'] + base.update( + self._to_dict(base_attrs=base_attrs) + ) + return base diff --git a/tableaudocumentapi/datasource.py b/tableaudocumentapi/datasource.py index 418dc53..85f0078 100644 --- a/tableaudocumentapi/datasource.py +++ b/tableaudocumentapi/datasource.py @@ -1,11 +1,11 @@ -import collections import itertools -import xml.etree.ElementTree as ET -import xml.sax.saxutils as sax +import functools +import collections from uuid import uuid4 +import xml.sax.saxutils as sax +import xml.etree.ElementTree as ET -from tableaudocumentapi import Connection, xfile -from tableaudocumentapi import Field +from tableaudocumentapi import BaseObject, Connection, Field, xfile from tableaudocumentapi.multilookup_dict import MultiLookupDict from tableaudocumentapi.xfile import xml_open @@ -33,6 +33,14 @@ def _is_used_by_worksheet(names, field): return any(y for y in names if y in field.worksheets) +def partialclass(cls, *args, **kwds): + + class NewCls(cls): + __init__ = functools.partialmethod(cls.__init__, *args, **kwds) + + return NewCls + + class FieldDictionary(MultiLookupDict): def used_by_sheet(self, name): @@ -94,28 +102,14 @@ class ConnectionParser(object): def __init__(self, datasource_xml, version): self._dsxml = datasource_xml self._dsversion = version - - def _extract_federated_connections(self): - connections = list(map(Connection, self._dsxml.findall('.//named-connections/named-connection/*'))) - # 'sqlproxy' connections (Tableau Server Connections) are not embedded into named-connection elements - # extract them manually for now - connections.extend(map(Connection, self._dsxml.findall("./connection[@class='sqlproxy']"))) - return connections - - def _extract_legacy_connection(self): - return list(map(Connection, self._dsxml.findall('connection'))) + self.Connection = partialclass(Connection, version=version) def get_connections(self): """Find and return all connections based on file format version.""" - - if float(self._dsversion) < 10: - connections = self._extract_legacy_connection() - else: - connections = self._extract_federated_connections() - return connections + return list(map(self.Connection, self._dsxml.findall('./connection'))) -class Datasource(object): +class Datasource(BaseObject): """A class representing Tableau Data Sources, embedded in workbook files or in TDS files. @@ -133,6 +127,7 @@ def __init__(self, dsxml, filename=None): 'formatted-name') # TDS files don't have a name attribute self._version = self._datasourceXML.get('version') self._caption = self._datasourceXML.get('caption', '') + self._inline = True if self._datasourceXML.get('inline', '') == 'true' else False self._connection_parser = ConnectionParser( self._datasourceXML, version=self._version) self._connections = self._connection_parser.get_connections() @@ -245,3 +240,12 @@ def _get_metadata_objects(self): def _get_column_objects(self): return [_column_object_from_column_xml(self._datasourceTree, xml) for xml in self._datasourceTree.findall('.//column')] + + def to_dict(self): + base_attrs = ['name', 'version', 'caption'] + to_dict_list_attrs = ['connections'] + base = self._to_dict( + base_attrs=base_attrs, + to_dict_list_attrs=to_dict_list_attrs + ) + return base diff --git a/tableaudocumentapi/relation.py b/tableaudocumentapi/relation.py new file mode 100644 index 0000000..cdd644e --- /dev/null +++ b/tableaudocumentapi/relation.py @@ -0,0 +1,138 @@ +from tableaudocumentapi import BaseObject + + +class Expression(BaseObject): + + def __init__(self, expxml): + self._expressionXML = expxml + self._op = expxml.get('op') + self._expressions = self._extract_expressions() or None + + def _extract_expressions(self): + return list(map(Expression, self._expressionXML.findall('./expression'))) + + @property + def op(self): + return self._op + + @property + def expressions(self): + return self._expressions + + def to_dict(self): + base = { + 'op': self.op + } + if self.expressions: + base['expressions'] = [exp.to_dict() for exp in self.expressions] + return base + + +class Clause(BaseObject): + + def __init__(self, clxml): + self._clauseXML = clxml + self._type = clxml.get('type') + self._expression = self._extract_expression() + + def _extract_expression(self): + expxml = self._clauseXML.find('./expression') + if expxml is not None: + return Expression(expxml) + else: + return None + + @property + def type(self): + return self._type + + @property + def expression(self): + return self._expression + + def to_dict(self): + return { + 'type': self.type, + 'expression': self.expression.to_dict() + } + + +class Relation(BaseObject): + """A class representing relations inside Connections.""" + + def __init__(self, relxml): + self._relationXML = relxml + self._type = relxml.get('type') + self._connection = relxml.get('connection') + self._name = relxml.get('name') + self._table = relxml.get('table') + self._text = self._extract_text() + self._clause = self._extract_clause() + self._relations = self._extract_relations() + + def _extract_clause(self): + clxml = self._relationXML.find('./clause') + if clxml is not None: + return Clause(clxml) + else: + return None + + def _extract_relations(self): + relxmls = self._relationXML.findall('./relation') + if relxmls: + return list(map(Relation, relxmls)) + else: + return None + + def _extract_text(self): + text = None + if self._relationXML.text: + if not self._relationXML.text.isspace(): + text = self._relationXML.text + return text + + @property + def type(self): + return self._type + + @property + def name(self): + return self._name + + @property + def connection(self): + return self._connection + + @property + def table(self): + return self._table + + @property + def text(self): + return self._text + + @property + def clause(self): + return self._clause + + @property + def relation(self): + return self._relations + + def _base_dict(self): + base_attrs = ['type', 'name', 'connection', 'table', 'text'] + return self._to_dict( + base_attrs=base_attrs + ) + + def to_dict(self): + to_dict_attrs = ['clause'] + to_dict_list_attrs = ['relation'] + base = self._base_dict() + base.update( + self._to_dict( + to_dict_attrs=to_dict_attrs, + to_dict_list_attrs=to_dict_list_attrs, + ) + ) + return base diff --git a/tableaudocumentapi/workbook.py b/tableaudocumentapi/workbook.py index 70b280c..9a62b07 100644 --- a/tableaudocumentapi/workbook.py +++ b/tableaudocumentapi/workbook.py @@ -1,11 +1,10 @@ import weakref - -from tableaudocumentapi import Datasource, xfile +from tableaudocumentapi import BaseObject, Datasource, xfile from tableaudocumentapi.xfile import xml_open -class Workbook(object): +class Workbook(BaseObject): """A class for writing Tableau workbook files.""" def __init__(self, filename): @@ -14,22 +13,34 @@ def __init__(self, filename): for access. """ - self._filename = filename - self._workbookTree = xml_open(self._filename, 'workbook') - self._workbookRoot = self._workbookTree.getroot() + self._source_platform = self._workbookRoot.get('source-platform') + self._source_build = self._workbookRoot.get('source-build') # prepare our datasource objects self._datasources = self._prepare_datasources( - self._workbookRoot) # self.workbookRoot.find('datasources') - - self._datasource_index = self._prepare_datasource_index(self._datasources) - + self._workbookRoot + ) # self.workbookRoot.find('datasources') + self._datasource_index = self._prepare_datasource_index( + self._datasources + ) self._worksheets = self._prepare_worksheets( self._workbookRoot, self._datasource_index ) + @property + def filename(self): + return self._filename + + @property + def source_platform(self): + return self._source_platform + + @property + def source_build(self): + return self._source_build + @property def datasources(self): return self._datasources @@ -38,10 +49,6 @@ def datasources(self): def worksheets(self): return self._worksheets - @property - def filename(self): - return self._filename - def save(self): """ Call finalization code and save file. @@ -74,9 +81,8 @@ def save_as(self, new_filename): @staticmethod def _prepare_datasource_index(datasources): retval = weakref.WeakValueDictionary() - for datasource in datasources: + for datasource in datasources.values(): retval[datasource.name] = datasource - return retval @staticmethod @@ -92,7 +98,7 @@ def _prepare_datasources(xml_root): ds = Datasource(datasource) datasources.append(ds) - return datasources + return {ds.name: ds for ds in datasources} @staticmethod def _prepare_worksheets(xml_root, ds_index): @@ -116,3 +122,15 @@ def _prepare_worksheets(xml_root, ds_index): datasource.fields[column_name].add_used_in(worksheet_name) return worksheets + + def to_dict(self): + base_attrs = [ + 'source_platform', 'filename', 'source_build', 'worksheets' + ] + to_dict_of_dict_attrs = ['datasources'] + base = self._to_dict( + base_attrs=base_attrs, + to_dict_of_dict_attrs=to_dict_of_dict_attrs + ) + base['datasource_index'] = list(self._datasource_index.keys()) + return base From fc12713c423a9da63aeaaa342765cba271f7cf6b Mon Sep 17 00:00:00 2001 From: kgpayne Date: Sun, 7 Feb 2021 14:32:15 +0000 Subject: [PATCH 2/2] Minor tweaks. (#2) * Added new relation entity and to_dict() methods. * More tidying. * Minor tweaks. --- tableaudocumentapi/connection.py | 8 ++++---- tableaudocumentapi/relation.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tableaudocumentapi/connection.py b/tableaudocumentapi/connection.py index 18cb22a..e924634 100644 --- a/tableaudocumentapi/connection.py +++ b/tableaudocumentapi/connection.py @@ -263,7 +263,7 @@ def __init__(self, connxml, version=None): self._relation_parser = RelationParser( connxml, version=version ) - self._relations = self._relation_parser.get_relations() + self._relation = self._relation_parser.get_relations() def _extract_named_connections(self): named_connections = [ @@ -276,12 +276,12 @@ def named_connections(self): return self._named_connections @property - def relations(self): - return self._relations + def relation(self): + return self._relation def to_dict(self): base = super().base_dict() - to_dict_list_attrs = ['relations'] + to_dict_list_attrs = ['relation'] to_dict_of_dict_attrs = ['named_connections'] base.update( self._to_dict( diff --git a/tableaudocumentapi/relation.py b/tableaudocumentapi/relation.py index cdd644e..7cd9ddd 100644 --- a/tableaudocumentapi/relation.py +++ b/tableaudocumentapi/relation.py @@ -68,7 +68,7 @@ def __init__(self, relxml): self._table = relxml.get('table') self._text = self._extract_text() self._clause = self._extract_clause() - self._relations = self._extract_relations() + self._relation = self._extract_relations() def _extract_clause(self): clxml = self._relationXML.find('./clause') @@ -117,7 +117,7 @@ def clause(self): @property def relation(self): - return self._relations + return self._relation def _base_dict(self): base_attrs = ['type', 'name', 'connection', 'table', 'text']