diff --git a/.gitignore b/.gitignore index e24445137..030b86cd0 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ _scratch/ Session.vim /.tox/ +/.idea diff --git a/docs/dev/analysis/features/text/hyperlink.rst b/docs/dev/analysis/features/text/hyperlink.rst new file mode 100644 index 000000000..aa8788da3 --- /dev/null +++ b/docs/dev/analysis/features/text/hyperlink.rst @@ -0,0 +1,301 @@ + +Hyperlink +========= + +Word allows hyperlinks to be placed in a document. + +The target of a hyperlink may be external, such as a web site, or internal, +to another location in the document. + +A hyperlink can contain multiple runs of text, each with its own distinct +text formatting (font). + + +Candidate protocol +------------------ + +An external hyperlink has an address and an optional anchor. An internal +hyperlink has only an anchor. + +.. highlight:: python + +**Add the external hyperlink** `http://us.com#about`:: + + >>> hyperlink = paragraph.add_hyperlink('About', address='http://us.com', anchor='about') + >>> hyperlink + + >>> hyperlink.text + 'About' + >>> hyperlink.address + 'http://us.com' + >>> hyperlink.anchor + 'about' + +**Add an internal hyperlink (to a bookmark)**:: + + >>> hyperlink = paragraph.add_hyperlink('Section 1', anchor='Section_1') + >>> hyperlink.text + 'Section 1' + >>> hyperlink.anchor + 'Section_1' + >>> hyperlink.address + None + +**Modify hyperlink properties**:: + + >>> hyperlink.text = 'Froogle' + >>> hyperlink.text + 'Froogle' + >>> hyperlink.address = 'mailto:info@froogle.com?subject=sup dawg?' + >>> hyperlink.address + 'mailto:info@froogle.com?subject=sup%20dawg%3F' + >>> hyperlink.anchor = None + >>> hyperlink.anchor + None + +**Add additional runs to a hyperlink**:: + + >>> hyperlink.text = 'A ' + >>> # .insert_run inserts a new run at idx, defaults to idx=-1 + >>> hyperlink.insert_run(' link').bold = True + >>> hyperlink.insert_run('formatted', idx=1).bold = True + >>> hyperlink.text + 'A formatted link' + >>> [r for r in hyperlink.iter_runs()] + [, + , + ] + +**Iterate over the run-level items a paragraph contains**:: + + >>> paragraph = document.add_paragraph('A paragraph having a link to: ') + >>> paragraph.add_hyperlink(text='github', address='http://github.com') + >>> [item for item in paragraph.iter_run_level_items()]: + [, ] + +**Paragraph.text now includes text contained in a hyperlink**:: + + >>> paragraph.text + 'A paragraph having a link to: github' + + +Word Behaviors +-------------- + +* What are the semantics of the w:history attribute on w:hyperlink? I'm + suspecting this indicates whether the link should show up blue (unvisited) + or purple (visited). I'm inclined to think we need that as a read/write + property on hyperlink. We should see what the MS API does on this count. + +* We probably need to enforce some character-set restrictions on w:anchor. + Word doesn't seem to like spaces or hyphens, for example. The simple type + ST_String doesn't look like it takes care of this. + +* We'll need to test URL escaping of special characters like spaces and + question marks in Hyperlink.address. + +* What does Word do when loading a document containing an internal hyperlink + having an anchor value that doesn't match an existing bookmark? We'll want + to know because we're sure to get support inquiries from folks who don't + match those up and wonder why they get a repair error or whatever. + + +Specimen XML +------------ + +.. highlight:: xml + + +External links +~~~~~~~~~~~~~~ + +The address (URL) of an external hyperlink is stored in the document.xml.rels +file, keyed by the w:hyperlink@r:id attribute:: + + + + This is an external link to + + + + + + + Google + + + + +... mapping to relationship in document.xml.rels:: + + + + + +A hyperlink can contain multiple runs of text (and a whole lot of other +stuff, including nested hyperlinks, at least as far as the schema indicates):: + + + + + + + + A hyperlink containing an + + + + + + + italicized + + + + + + word + + + + + +Internal links +~~~~~~~~~~~~~~ + +An internal link provides "jump to another document location" behavior in the +Word UI. An internal link is distinguished by the absence of an r:id +attribute. In this case, the w:anchor attribute is required. The value of the +anchor attribute is the name of a bookmark in the document. + +Example:: + + + + See + + + + + + + Section 4 + + + + for more details. + + + +... referring to this bookmark elsewhere in the document:: + + + + + Section 4 + + + + + +Schema excerpt +-------------- + +.. highlight:: xml + +:: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/dev/analysis/features/text/index.rst b/docs/dev/analysis/features/text/index.rst index 2fff03924..c87732c1c 100644 --- a/docs/dev/analysis/features/text/index.rst +++ b/docs/dev/analysis/features/text/index.rst @@ -13,3 +13,4 @@ Text underline run-content breaks + hyperlink diff --git a/docx/__init__.py b/docx/__init__.py index 59756c021..30e46bdcb 100644 --- a/docx/__init__.py +++ b/docx/__init__.py @@ -2,7 +2,7 @@ from docx.api import Document # noqa -__version__ = "0.8.11" +__version__ = "0.8.11.3" # register custom Part classes with opc package reader diff --git a/docx/oxml/__init__.py b/docx/oxml/__init__.py index 093c1b45b..abc26e3f4 100644 --- a/docx/oxml/__init__.py +++ b/docx/oxml/__init__.py @@ -226,6 +226,11 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): CT_Ind, CT_Jc, CT_PPr, + CT_Border, + CT_BorderTop, + CT_BorderLeft, + CT_BorderBottom, + CT_BorderRight, CT_Spacing, CT_TabStop, CT_TabStops, @@ -236,6 +241,12 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None): register_element_cls('w:keepNext', CT_OnOff) register_element_cls('w:pageBreakBefore', CT_OnOff) register_element_cls('w:pPr', CT_PPr) +register_element_cls('w:pBdr', CT_Border) +register_element_cls('w:top', CT_BorderTop) +register_element_cls('w:left', CT_BorderLeft) +register_element_cls('w:bottom', CT_BorderBottom) +register_element_cls('w:right', CT_BorderRight) +register_element_cls('w:pPr', CT_PPr) register_element_cls('w:pStyle', CT_String) register_element_cls('w:spacing', CT_Spacing) register_element_cls('w:tab', CT_TabStop) diff --git a/docx/oxml/text/parfmt.py b/docx/oxml/text/parfmt.py index 466b11b1b..840e75ade 100644 --- a/docx/oxml/text/parfmt.py +++ b/docx/oxml/text/parfmt.py @@ -8,7 +8,7 @@ WD_ALIGN_PARAGRAPH, WD_LINE_SPACING, WD_TAB_ALIGNMENT, WD_TAB_LEADER ) from ...shared import Length -from ..simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure +from ..simpletypes import ST_SignedTwipsMeasure, ST_TwipsMeasure, ST_String, XsdInt from ..xmlchemy import ( BaseOxmlElement, OneOrMore, OptionalAttribute, RequiredAttribute, ZeroOrOne @@ -53,6 +53,7 @@ class CT_PPr(BaseOxmlElement): pageBreakBefore = ZeroOrOne('w:pageBreakBefore', successors=_tag_seq[4:]) widowControl = ZeroOrOne('w:widowControl', successors=_tag_seq[6:]) numPr = ZeroOrOne('w:numPr', successors=_tag_seq[7:]) + border = ZeroOrOne('w:pBdr', successors=_tag_seq[9:]) tabs = ZeroOrOne('w:tabs', successors=_tag_seq[11:]) spacing = ZeroOrOne('w:spacing', successors=_tag_seq[22:]) ind = ZeroOrOne('w:ind', successors=_tag_seq[23:]) @@ -60,6 +61,23 @@ class CT_PPr(BaseOxmlElement): sectPr = ZeroOrOne('w:sectPr', successors=_tag_seq[35:]) del _tag_seq + def _insert_border(self, border): + self.insert(0, border) + return border + + # @property + # def border_top(self): + # border = self.border + # if border is None: + # return None + # return border.top + # + # @border_top.setter + # def border_top(self, value): + # border = self.get_or_add_border() + # border.border_top = value + # + @property def first_line_indent(self): """ @@ -346,3 +364,111 @@ def insert_tab_in_order(self, pos, align, leader): return new_tab self.append(new_tab) return new_tab + +class CT_Border(BaseOxmlElement): + + _border_tag_seq = ( + 'w:top', 'w:left', 'w:bottom', 'w:right' + ) + + top = ZeroOrOne('w:top', successors=()) + left = ZeroOrOne('w:left', successors=()) + bottom = ZeroOrOne('w:bottom', successors=()) + right = ZeroOrOne('w:right', successors=()) + + # @property + # def border_top(self): + # top = self.top + # if top is None: + # return None + # return top + # + # @border_top.setter + def border_top(self, size, space, type): + # if value is None and self.top is None: + # return + top = self.get_or_add_top() + top.type = type + top.size = size + top.space = space + top.color = 'auto' + + # @property + # def border_left(self): + # left = self.left + # if left is None: + # return None + # return left + # + # @border_left.setter + def border_left(self, size, space, type): + # if value is None and self.left is None: + # return + left = self.get_or_add_left() + left.type = type + left.size = size + left.space = space + left.color = 'auto' + + # @property + # def border_bottom(self): + # bottom = self.bottom + # if bottom is None: + # return None + # return bottom + # + # @border_bottom.setter + def border_bottom(self, size, space, type): + # if value is None and self.bottom is None: + # return + bottom = self.get_or_add_bottom() + bottom.type = type + bottom.size = size + bottom.space = space + bottom.color = 'auto' + + # @property + # def border_right(self): + # right = self.right + # if right is None: + # return None + # return right + # + # @border_right.setter + def border_right(self, size, space, type): + # if value is None and self.right is None: + # return + right = self.get_or_add_right() + right.type = type + right.size = size + right.space = space + right.color = 'auto' + + +class CT_BorderTop(BaseOxmlElement): + + type = OptionalAttribute('w:val', ST_String) + size = OptionalAttribute('w:sz', XsdInt) + space = OptionalAttribute('w:space', XsdInt) + color = OptionalAttribute('w:color', ST_String) + +class CT_BorderLeft(BaseOxmlElement): + + type = OptionalAttribute('w:val', ST_String) + size = OptionalAttribute('w:sz', XsdInt) + space = OptionalAttribute('w:space', XsdInt) + color = OptionalAttribute('w:color', ST_String) + +class CT_BorderBottom(BaseOxmlElement): + + type = OptionalAttribute('w:val', ST_String) + size = OptionalAttribute('w:sz', XsdInt) + space = OptionalAttribute('w:space', XsdInt) + color = OptionalAttribute('w:color', ST_String) + +class CT_BorderRight(BaseOxmlElement): + + type = OptionalAttribute('w:val', ST_String) + size = OptionalAttribute('w:sz', XsdInt) + space = OptionalAttribute('w:space', XsdInt) + color = OptionalAttribute('w:color', ST_String) diff --git a/docx/text/parfmt.py b/docx/text/parfmt.py index 37206729c..d24b30f30 100644 --- a/docx/text/parfmt.py +++ b/docx/text/parfmt.py @@ -39,6 +39,54 @@ def alignment(self, value): pPr = self._element.get_or_add_pPr() pPr.jc_val = value + # @property + # def border(self): + # pPr = self._element.pPr + # if pPr is None: + # return None + # return pPr.border + # + # @border.setter + # def border(self, value): + # pPr = self._element.get_or_add_pPr() + # pPr.border = value + + # @property + # def border(self): + # pPr = self._element.pPr + # if pPr is None: + # return None + # return pPr.border + # + # @border.setter + def border(self, size=4, space=0, type='single'): + pPr = self._element.get_or_add_pPr() + border = pPr.get_or_add_border() + border.border_top(size, space, type) + border.border_left(size, space, type) + border.border_bottom(size, space, type) + border.border_right(size, space, type) + + def border_top(self, size=4, space=0, type='single'): + pPr = self._element.get_or_add_pPr() + border = pPr.get_or_add_border() + border.border_top(size, space, type) + + def border_left(self, size=4, space=0, type='single'): + pPr = self._element.get_or_add_pPr() + border = pPr.get_or_add_border() + border.border_left(size, space, type) + + def border_bottom(self, size=4, space=0, type='single'): + pPr = self._element.get_or_add_pPr() + border = pPr.get_or_add_border() + border.border_bottom(size, space, type) + + def border_right(self, size=4, space=0, type='single'): + pPr = self._element.get_or_add_pPr() + border = pPr.get_or_add_border() + border.border_right(size, space, type) + @property def first_line_indent(self): """ diff --git a/setup.py b/setup.py index 7c34edcca..4e42d08e3 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ def text_of(relpath): KEYWORDS = "docx office openxml word" AUTHOR = "Steve Canny" AUTHOR_EMAIL = "python-docx@googlegroups.com" -URL = "https://github.com/python-openxml/python-docx" +URL = "https://github.com/phgrigorio/python-docx" LICENSE = text_of("LICENSE") PACKAGES = find_packages(exclude=["tests", "tests.*"]) PACKAGE_DATA = {"docx": ["templates/*.xml", "templates/*.docx"]}