diff --git a/Makefile b/Makefile index e5defe4cf..72a500c44 100644 --- a/Makefile +++ b/Makefile @@ -24,9 +24,11 @@ datarootdir = $(prefix)/share docdir = $(datarootdir)/cppreference/doc bookdir = $(datarootdir)/devhelp/books +qhelpgenerator = qhelpgenerator + #Version -VERSION=20141118 +VERSION=20150809 #STANDARD RULES @@ -39,6 +41,7 @@ DISTFILES= \ ddg_parse_html.py \ devhelp2qch.xsl \ fix_devhelp-links.py \ + gen_chapter_index.py \ index2autolinker.py \ index2browser.py \ index2ddg.py \ @@ -47,12 +50,11 @@ DISTFILES= \ index2search.py \ index2highlight.py \ index_transform.py \ - index-chapters-c.xml \ - index-chapters-cpp.xml \ index-functions.README \ index-functions-c.xml \ index-functions-cpp.xml \ link_map.py \ + merge_devhelp.py \ preprocess.py \ preprocess.xsl \ preprocess-css.css \ @@ -126,47 +128,47 @@ release: all #WORKER RULES doc_html: output/reference -doc_devhelp: output/cppreference-doc-en-c.devhelp2 output/cppreference-doc-en-cpp.devhelp2 +doc_devhelp: output/cppreference-doc-en-c.devhelp2 output/cppreference-doc-en-cpp.devhelp2 output/complete.devhelp2 doc_qch: output/cppreference-doc-en-cpp.qch doc_doxygen: output/cppreference-doxygen-web.tag.xml output/cppreference-doxygen-local.tag.xml -#builds the title<->location map -output/link-map.xml: output/reference - ./build_link_map.py - #build the .devhelp2 index output/cppreference-doc-en-c.devhelp2: \ - output/reference \ - output/link-map.xml - ./index2devhelp.py $(docdir)/html index-chapters-c.xml \ + output/reference + ./gen_chapter_index.py --i output/reference/en/c.html --o output/index-chapters-c.xml + ./index2devhelp.py $(docdir)/html output/index-chapters-c.xml \ "C Standard Library reference" "cppreference-doc-en-c" "c" \ - index-functions-c.xml "output/devhelp-index-c.xml" - ./fix_devhelp-links.py "output/devhelp-index-c.xml" \ - "output/cppreference-doc-en-c.devhelp2" + index-functions-c.xml "output/cppreference-doc-en-c.devhelp2" + output/cppreference-doc-en-cpp.devhelp2: \ - output/reference \ - output/link-map.xml - ./index2devhelp.py $(docdir)/html index-chapters-cpp.xml \ + output/reference + ./gen_chapter_index.py --i output/reference/en/cpp.html --o output/index-chapters-cpp.xml + ./index2devhelp.py $(docdir)/html output/index-chapters-cpp.xml \ "C++ Standard Library reference" "cppreference-doc-en-cpp" "cpp" \ - index-functions-cpp.xml "output/devhelp-index-cpp.xml" - ./fix_devhelp-links.py "output/devhelp-index-cpp.xml" \ - "output/cppreference-doc-en-cpp.devhelp2" + index-functions-cpp.xml "output/cppreference-doc-en-cpp.devhelp2" + +output/complete.devhelp2: \ + output/reference output/cppreference-doc-en-c.devhelp2 output/cppreference-doc-en-cpp.devhelp2 + ./merge_devhelp.py \ + --c "output/cppreference-doc-en-c.devhelp2" \ + --cpp "output/cppreference-doc-en-cpp.devhelp2" \ + --out "output/complete.devhelp2" #build the .qch (QT help) file -output/cppreference-doc-en-cpp.qch: output/qch-help-project-cpp.xml +output/cppreference-doc-en-cpp.qch: output/complete.xml #qhelpgenerator only works if the project file is in the same directory as the documentation - cp "output/qch-help-project-cpp.xml" "output/reference/qch.xml" + cp "output/complete.xml" "output/reference/qch.xml" pushd "output/reference" > /dev/null; \ - qhelpgenerator "qch.xml" -o "../cppreference-doc-en-cpp.qch"; \ + $(qhelpgenerator) "qch.xml" -o "../cppreference-doc-en-cpp.qch"; \ popd > /dev/null rm -f "output/reference/qch.xml" -output/qch-help-project-cpp.xml: output/cppreference-doc-en-cpp.devhelp2 +output/complete.xml: output/complete.devhelp2 #build the file list echo "" > "output/qch-files.xml" @@ -178,20 +180,18 @@ output/qch-help-project-cpp.xml: output/cppreference-doc-en-cpp.devhelp2 echo "" >> "output/qch-files.xml" #create the project (copies the file list) - xsltproc devhelp2qch.xsl "output/cppreference-doc-en-cpp.devhelp2" > \ - "output/qch-help-project-cpp.xml" + xsltproc devhelp2qch.xsl "output/complete.devhelp2" > \ + "output/complete.xml" # build doxygen tag file output/cppreference-doxygen-local.tag.xml: \ - output/reference \ - output/link-map.xml + output/reference ./index2doxygen-tag.py "output/link-map.xml" \ "index-functions-cpp.xml" \ "output/cppreference-doxygen-local.tag.xml" output/cppreference-doxygen-web.tag.xml: \ - output/reference \ - output/link-map.xml + output/reference ./index2doxygen-tag.py web \ "index-functions-cpp.xml" \ "output/cppreference-doxygen-web.tag.xml" @@ -230,5 +230,9 @@ source: --timeout=180 --no-verbose \ --retry-connrefused --waitretry=1 --read-timeout=20 \ http://en.cppreference.com/w/ ; \ + wget --adjust-extension --page-requisites --convert-links \ + --timeout=180 --no-verbose \ + --retry-connrefused --waitretry=1 --read-timeout=20 \ + http://en.cppreference.com/w/Cppreference:FAQ ; \ popd > /dev/null diff --git a/build_link_map.py b/build_link_map.py index e2df03ea1..0fbe6f594 100755 --- a/build_link_map.py +++ b/build_link_map.py @@ -28,7 +28,7 @@ # returns a dict { title -> filename }. # directory - either 'output/reference' or 'reference' -def build_link_map(directory): +def build_link_map_impl(directory): # find all html files html_files = [] for root, dirnames, filenames in os.walk(directory): @@ -57,11 +57,14 @@ def build_link_map(directory): link_map.add_link(title, target) return link_map -def main(): - link_map = build_link_map('output/reference') +def build_link_map(): + link_map = build_link_map_impl('output/reference') # create an xml file containing mapping between page title and actual location link_map.write('output/link-map.xml') +def main(): + build_link_map() + if __name__ == "__main__": main() diff --git a/clean_faq.py b/clean_faq.py new file mode 100755 index 000000000..75abc2969 --- /dev/null +++ b/clean_faq.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2015 Michael Munzert +# +# This file is part of cppreference-doc +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +import os +import re +import shutil +import urllib.parse + +from bs4 import BeautifulSoup + +def urldecode(s): + return urllib.parse.unquote(s) + +def clean_faq(output_path): + # Fix FAQ links + shutil.move(os.path.join(output_path, 'reference/en/Cppreference:FAQ.html'), + os.path.join(output_path, 'reference/en/FAQ.html')) + for fn in ['reference/en/c.html', 'reference/en/cpp.html']: + with open(os.path.join(output_path, fn), 'rb') as f: + soup = BeautifulSoup(f.read(), "lxml") + + for link in soup.find_all('a'): + if link.text == 'FAQ': + link['href'] = 'FAQ.html' + break; + + with open(os.path.join(output_path, fn), 'wb') as f: + f.write(soup.prettify('utf-8')) + + # clean FAQ.html + with open(os.path.join(output_path, 'reference/en/FAQ.html'), 'rb') as f: + soup = BeautifulSoup(f.read(), "lxml") + + for link in soup.find_all('a'): + try: + href = link['href'] + if 'FAQ.html' in href: + link['href'] = re.sub('([^#]*)', 'FAQ.html', href) + except KeyError: + pass + + navbar_head = soup.find(attrs={'class': 't-navbar-head'}) + navbar_head.a.string = 'Index' + navbar_head.a['href'] = 'index.html' + navbar_head.a['title'] = 'index' + navbar_head.div.extract() + + with open(os.path.join(output_path, 'reference/en/FAQ.html'), 'wb') as f: + f.write(soup.prettify('utf-8')) + diff --git a/devhelp2qch.xsl b/devhelp2qch.xsl index 3ff99829c..b887be629 100644 --- a/devhelp2qch.xsl +++ b/devhelp2qch.xsl @@ -38,12 +38,18 @@ + + + + + + @@ -77,7 +83,8 @@ - + + diff --git a/gen_chapter_index.py b/gen_chapter_index.py new file mode 100755 index 000000000..7ab9768f8 --- /dev/null +++ b/gen_chapter_index.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2015 Michael Munzert +# +# This file is part of cppreference-doc +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +# Generate the chapter index + +import argparse +from lxml import etree +from bs4 import BeautifulSoup +import re + +def main(): + parser = argparse.ArgumentParser(description='Generate the chapter index') + parser.add_argument('--i', help='source file', required=True) + parser.add_argument('--o', help='output file', required=True) + + args = parser.parse_args() + + with open(args.i, 'r') as f: + soup = BeautifulSoup(f.read(), "lxml") + + root = etree.XML('''''') + for p in soup.find_all('p'): + for tag in p: + if tag.name == "b": + try: + last = etree.SubElement(root, "sub") + last.attrib["name"] = re.sub(r"\s+", ' ', tag.a.text.strip()) + last.attrib["link"] = "en/" + tag.a["href"] + except KeyError: + pass + if tag.name == "a": + try: + child = etree.SubElement(last, "sub") + child.attrib["name"] = re.sub(r"\s+", ' ', tag.text.strip()) + child.attrib["link"] = "en/" + tag["href"] + except KeyError: + pass + + etree.ElementTree(root).write(args.o, pretty_print=True, encoding='utf-8') + + +if __name__ == "__main__": + main() diff --git a/index-chapters-c.xml b/index-chapters-c.xml deleted file mode 100644 index 7ed4dffe1..000000000 --- a/index-chapters-c.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/index-chapters-cpp.xml b/index-chapters-cpp.xml deleted file mode 100644 index a486693b7..000000000 --- a/index-chapters-cpp.xml +++ /dev/null @@ -1,83 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/merge_devhelp.py b/merge_devhelp.py new file mode 100755 index 000000000..bf9a9b49b --- /dev/null +++ b/merge_devhelp.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2015 Michael Munzert +# +# This file is part of cppreference-doc +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + +# Merge the c and the cpp devhelp files + +import argparse +from lxml import etree +from copy import deepcopy +import datetime + +def main(): + parser = argparse.ArgumentParser(description='Merge the c and the cpp devhelp files') + parser.add_argument('--c', help='devhelp file for c', required=True) + parser.add_argument('--cpp', help='devhelp file for c++', required=True) + parser.add_argument('--out', help='output file', required=True) + + args = parser.parse_args() + + with open(args.c, 'r') as f: + doc_c = etree.XML(f.read()) + + with open(args.cpp, 'r') as f: + doc_cpp = etree.XML(f.read()) + + + timestamp = datetime.datetime.now().strftime('%Y-%m-%d') + + root = etree.XML(''''''.format(timestamp)) + + chapters = etree.SubElement(root, "chapters") + chapters.attrib["xmlns"] = "http://www.devhelp.net/book" + + node_c = etree.SubElement(chapters, "sub") + node_c.attrib["name"] = "C" + node_c.attrib["link"] = "en/c.html" + + node_cpp = etree.SubElement(chapters, "sub") + node_cpp.attrib["name"] = "C++" + node_cpp.attrib["link"] = "en/cpp.html" + + node_fct = etree.SubElement(root, "functions") + + ns = doc_c.nsmap[None] + chapters_c = doc_c.find(".//{" + ns + "}chapters") + for element in chapters_c: + temp = deepcopy(element) + temp.text = None + node_c.append(temp) + + ns = doc_cpp.nsmap[None] + chapters_cpp = doc_cpp.find(".//{" + ns + "}chapters") + for element in chapters_cpp: + temp = deepcopy(element) + temp.text = None + node_cpp.append(temp) + + for element in doc_c.find(".//{" + ns + "}functions"): + temp = deepcopy(element) + try: + temp.attrib["link"] = "en/" + temp.attrib["link"] + ".html" + except KeyError: + pass + node_fct.append(temp) + + for element in doc_cpp.find(".//{" + ns + "}functions"): + temp = deepcopy(element) + try: + temp.attrib["link"] = "en/" + temp.attrib["link"] + ".html" + except KeyError: + pass + node_fct.append(temp) + + etree.ElementTree(root).write(args.out, pretty_print=True, encoding='utf-8') + +if __name__ == "__main__": + main() diff --git a/preprocess-css.css b/preprocess-css.css index 53547f25a..3c4853bb4 100644 --- a/preprocess-css.css +++ b/preprocess-css.css @@ -8,7 +8,7 @@ div#cpp-content-base { } div#content { - margin: 0; + margin: auto; position: static; width: 48.75em; } @@ -20,6 +20,7 @@ html, body { .t-navbar { height: 1.72em; + left: inherit; } tt, diff --git a/preprocess.py b/preprocess.py index 277babfab..7b968c33e 100755 --- a/preprocess.py +++ b/preprocess.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # Copyright (C) 2011, 2012 Povilas Kanapickas +# Copyright (C) 2015 Michael Munzert # # This file is part of cppreference-doc # @@ -24,11 +25,14 @@ import shutil import urllib.parse from xml_utils import xml_escape, xml_unescape +from bs4 import BeautifulSoup +from build_link_map import build_link_map +from clean_faq import clean_faq # copy the source tree os.system('rm -rf output/reference') os.system('mkdir -p output/reference') -os.system('cp -rt output/reference reference/*') +os.system('cp -r reference/* output/reference ') # rearrange the archive {root} here is output/reference @@ -61,7 +65,7 @@ if (os.path.isdir(src_data_path)): # the skin files should be the same for all languages thus we # can merge everything - os.system('cp -rl ' + src_data_path + '/* ' + data_path) + os.system('cp -r ' + src_data_path + '/* ' + data_path) os.system('rm -r ' + src_data_path) # also copy the custom fonts @@ -125,6 +129,12 @@ def rename_file(root, fn, new_fn): rename_file('output/reference/en/cpp/numeric/math', 'NAN.html', 'NAN.2.html') rename_file('output/reference/en/c/numeric/math', 'NAN.html', 'NAN.2.html') +# clean FAQ +clean_faq('output') + +# generate link map as long as there is all information present +build_link_map() + # find files that need to be preprocessed html_files = [] for root, dirnames, filenames in os.walk('output/reference/'): @@ -132,47 +142,78 @@ def rename_file(root, fn, new_fn): html_files.append(os.path.join(root, filename)) #temporary fix -r1 = re.compile('', re.MULTILINE) +# r1 = re.compile('', re.MULTILINE) # fix links to files in rename_map rlink = re.compile('((?:src|href)=")([^"]*)(")') +html_comment = re.compile("") + def rlink_fix(match): pre = match.group(1) target = match.group(2) post = match.group(3) - target = xml_unescape(target) - target = urllib.parse.unquote(target) - for fn,new_fn in rename_map: - target = target.replace(fn, new_fn) - target = target.replace('../../upload.cppreference.com/mwiki/','../common/') - target = target.replace('../mwiki/','../common/') - target = re.sub('(\.php|\.css)\?.*', '\\1', target) - target = urllib.parse.quote(target) - target = xml_escape(target) + if "http://" not in target: + target = xml_unescape(target) + target = urllib.parse.unquote(target) + for fn,new_fn in rename_map: + target = target.replace(fn, new_fn) + target = target.replace('../../upload.cppreference.com/mwiki/','../common/') + target = target.replace('../mwiki/','../common/') + target = re.sub('(\.php|\.css)\?.*', '\\1', target) + target = urllib.parse.quote(target) + target = xml_escape(target) target = target.replace('%23', '#'); return pre + target + post + # clean the html files -for fn in html_files: - f = open(fn, "r") - text = f.read() - f.close() +bad_classes = {"noprint", "editsection", "printfooter", "catlinks"} - text = r1.sub('', text); - text = rlink.sub(rlink_fix, text) +count = len(html_files) +for idx, fn in enumerate(html_files): + if idx % 50 == 0: + print("{} of {}".format(idx, count)) + with open(fn, 'r') as f: + text = f.read() - f = open(fn, "w") - f.write(text) - f.close() + # text = r1.sub('', text); + text = html_comment.sub('', text); + text = rlink.sub(rlink_fix, text) + soup = BeautifulSoup(text, "lxml") + + bad = [] + for tag in soup(): + tag_name = tag.name + if tag_name == "script": + bad.append(tag) + elif tag_name == "link" and not "stylesheet" in tag["rel"]: + bad.append(tag) + elif tag_name == "style": + try: + if "text/css" not in tag["type"]: + bad.append(tag) + except KeyError: + bad.append(tag) + elif tag_name == "meta" and tag.has_attr("content"): + bad.append(tag) + elif tag_name == "a" and tag.has_attr("title") and tag["title"] == "About this image": + bad.append(tag.parent) + elif tag_name == "table" and tag.has_attr("id") and tag["id"] == "toc": + bad.append(tag) + else: + try: + if len(bad_classes.intersection(tag["class"])): + bad.append(tag) + except KeyError: + pass + + [s.extract() for s in bad] + + with open(fn, 'w') as f: + f.write(soup.prettify()) - tmpfile = fn + '.tmp'; - ret = os.system('xsltproc --novalid --html --encoding UTF-8 preprocess.xsl "' + fn + '" > "' + tmpfile + '"') - if ret != 0: - print("FAIL: " + fn) - continue - os.system('mv "' + tmpfile + '" "' + fn + '"') # append css modifications