Source code for ged2doc.html_writer

"""Module which produces HTML output.
"""

__all__ = ["HtmlWriter"]

import base64
import io
import logging
import pkg_resources
import string
from PIL import Image
from html import escape as html_escape

from ged4py import model
from .ancestor_tree import AncestorTree
from .ancestor_tree_svg import SVGTreeVisitor
from .size import Size
from . import utils
from . import writer


_log = logging.getLogger(__name__)


def TR(x):
    """This is no-op function, only used to mark translatable strings,
    to extract all strings run ``pygettext -k TR ...``
    """
    return x  # NOQA


[docs]class HtmlWriter(writer.Writer): """Transforms GEDCOM file into nicely formatted HTML page. This is a sub-class of `~ged2doc.writer.Writer` class providing implementation for rendering methods which transform GEDCOM info into HTML constructs. Constructor takes a large number of arguments which configure appearance of the resulting HTML page. After instantiating an object of this type one has to call `~ged2doc.writer.Writer.save()` method to produce output file. Parameters ---------- flocator : `ged2doc.input.FileLocator` File locator instance. output : `str` or `io.TextIOBase` Name for the output file or file object. tr : `ged2doc.i18n.I18N` Object supporting translation. encoding : `str`, optional GEDCOM file encoding, if ``None`` then encoding is determined from file itself. encoding_errors : `str`, optional Controls error handling behavior during string decoding, one of "strict" (default), "ignore", or "replace". sort_order : `ged4py.model.NameOrder`, optional Determines ordering of person in output file, one of the constants defined in `ged4py.model.NameOrder` enum. name_fmt : `int`, optional Bit mask with flags from `ged2doc.name` module. make_images : `bool`, optional If ``True`` (default) then generate images for persons. make_stat : `bool`, optional If ``True`` (default) then generate statistics section. make_toc : `bool`, optional If ``True`` (default) then generate Table of Contents. events_without_dates : `bool`, optional If ``True`` (default) then show events that have no associated dates. page_width : `ged2doc.size.Size` Width of the produced HTML page. image_width : `ged2doc.size.Size` Size of the images. image_height : `ged2doc.size.Size` Size of the images. image_upscale : `bool` If True then smaller images will be re-scaled to extend to image size. tree_width : `int` Number of generations in ancestor tree. """ def __init__(self, flocator, output, tr, encoding=None, encoding_errors="strict", sort_order=model.NameOrder.SURNAME_GIVEN, name_fmt=0, make_images=True, make_stat=True, make_toc=True, events_without_dates=True, page_width="800px", image_width="300px", image_height="300px", image_upscale=False, tree_width=4): writer.Writer.__init__(self, flocator, tr, encoding=encoding, encoding_errors=encoding_errors, sort_order=sort_order, name_fmt=name_fmt, make_images=make_images, make_stat=make_stat, make_toc=make_toc, events_without_dates=events_without_dates) self._page_width = Size(page_width) self._image_width = Size(image_width) self._image_height = Size(image_height) self._image_upscale = image_upscale self._tree_width = tree_width if hasattr(output, 'write'): self._output = output self._close = False else: self._output = open(output, 'wb') self._close = True self._toc = []
[docs] def _render_prolog(self): # docstring inherited from base class doc = ['<!DOCTYPE html>'] doc += ['<html>', '<head>'] doc += ['<meta http-equiv="Content-Type" content="text/html;' ' charset=utf-8">\n'] doc += ['<title>', 'Family Tree', '</title>\n'] d = dict(page_width=self._page_width ^ 'px') style = pkg_resources.resource_string(__name__, "data/styles/default") style = style.decode('utf-8') doc += [string.Template(style).substitute(d)] doc += ['</head>\n', '<body>\n'] doc += ['<div id="contents_div"/>\n'] for line in doc: self._output.write(line.encode('utf-8'))
[docs] def _interpolate(self, text): """Takes text with embedded references and returns properly escaped text with HTML links. Parameters ---------- text : `str` Arbitrary text with references. Returns ------- html : `str` HTML as text. """ result = "" for piece in utils.split_refs(text): if isinstance(piece, tuple): xref, name = piece result += '<a href="#{0}">{1}</a>'.format(html_escape(xref), html_escape(name)) else: result += html_escape(piece) return result
[docs] def _render_section(self, level, ref_id, title, newpage=False): # docstring inherited from base class self._toc += [(level, ref_id, title)] doc = ['<h{0} id="{1}">{2}</h{0}>\n'.format(level, ref_id, html_escape(title))] for line in doc: self._output.write(line.encode('utf-8'))
[docs] def _render_person(self, person, image_data, attributes, families, events, notes): # docstring inherited from base class doc = [] # image if present if image_data: img = self._get_image_fragment(image_data) if img: doc += [img] # all attributes follow for attr, value in attributes: doc += ['<p>' + self._interpolate(attr) + ": " + self._interpolate(value) + '</p>\n'] if families: hdr = self._tr.tr(TR("Spouses and children"), person.sex) doc += ['<h3>' + html_escape(hdr) + '</h3>\n'] for family in families: family = self._interpolate(family) doc += ['<p>' + family + '</p>\n'] if events: hdr = self._tr.tr(TR("Events and dates")) doc += ['<h3>' + html_escape(hdr) + '</h3>\n'] for date, facts in events: facts = self._interpolate(facts) doc += ['<p>' + html_escape(date) + ": " + facts + '</p>\n'] if notes: hdr = self._tr.tr(TR("Comments")) doc += ['<h3>' + html_escape(hdr) + '</h3>\n'] for note in notes: note = self._interpolate(note) doc += ['<p>' + note + '</p>\n'] # plot ancestors tree doc += self._make_ancestor_tree(person) for line in doc: self._output.write(line.encode('utf-8'))
[docs] def _render_name_stat(self, n_total, n_females, n_males): # docstring inherited from base class doc = [] doc += ['<p>%s: %d</p>' % (self._tr.tr(TR('Person count')), n_total)] doc += ['<p>%s: %d</p>' % (self._tr.tr(TR('Female count')), n_females)] doc += ['<p>%s: %d</p>' % (self._tr.tr(TR('Male count')), n_males)] for line in doc: self._output.write(line.encode('utf-8'))
[docs] def _render_name_freq(self, freq_table): # docstring inherited from base class def _gencouples(namefreq): halflen = (len(namefreq) + 1) // 2 for i in range(halflen): n1, c1 = namefreq[2 * i] n2, c2 = None, None if 2 * i + 1 < len(namefreq): n2, c2 = namefreq[2 * i + 1] yield n1, c1, n2, c2 total = float(sum(count for _, count in freq_table)) tbl = ['<table class="statTable">\n'] for name1, count1, name2, count2 in _gencouples(freq_table): tbl += ['<tr>\n'] tbl += ['<td width="25%">{0}</td>'.format(name1 or '-')] tbl += ['<td width="20%">{0} ({1:.1%})</td>'.format( count1, count1 / total)] if count2 is not None: tbl += ['<td width="25%">{0}</td>'.format(name2 or '-')] tbl += ['<td width="20%">{0} ({1:.1%})</td>'.format( count2, count2 / total)] tbl += ['</tr>\n'] tbl += ['</table>\n'] for line in tbl: self._output.write(line.encode('utf-8'))
[docs] def _render_toc(self): # docstring inherited from base class section = self._tr.tr(TR("Table Of Contents")) doc = ['<h1>{0}</h1>\n'.format(html_escape(section))] lvl = 0 for toclvl, tocid, text in self._toc: while lvl < toclvl: doc += ['<ul>'] lvl += 1 while lvl > toclvl: doc += ['</ul>'] lvl -= 1 doc += ['<li><a href="#{0}">{1}</a></li>\n'.format(tocid, text)] while lvl > 0: doc += ['</ul>'] lvl -= 1 for line in doc: self._output.write(line.encode('utf-8'))
[docs] def _finalize(self): # docstring inherited from base class if self._close: self._output.close()
[docs] def _get_image_fragment(self, image_data): """Returns <img> HTML fragment for given image data (byte array). Parameters ---------- image_data : `bytes` Image data. Returns ------- html : `str` HTML text containing image. """ try: imgfile = io.BytesIO(image_data) img = Image.open(imgfile) except Exception as exc: # PIL could fail for any reason, no chance to know, # just log an error and ignore this image _log.error("error while loading image: %s", exc) return None maxsize = (self._image_width.px, self._image_height.px) newimg = utils.img_resize(img, maxsize) if newimg is img: # means size was not changed and image is smaller # than box, we may want to extend it imgsize = "" if self._image_upscale: extend = utils.resize(img.size, maxsize, False) imgsize = ' width="{}" height="{}"'.format(*extend) # reuse original image data tag = '<img class="personImage"{imgsize} '\ 'src="data:{mime};base64,{data}"/>' data = base64.b64encode(image_data).decode('ascii') return tag.format(mime=utils.img_mime_type(img), data=data, imgsize=imgsize) else: # new image, need to convert it to bytes imgfile = io.BytesIO() mimetype = utils.img_save(newimg, imgfile) if mimetype: tag = '<img class="personImage" '\ 'src="data:{mime};base64,{data}"/>' data = base64.b64encode(imgfile.getvalue()).decode('ascii') return tag.format(mime=mimetype, data=data)
[docs] def _make_ancestor_tree(self, person): """Make SVG picture for parent tree. Parameters ---------- person : `ged4py.model.Individual` INDI record Returns ------- html : `list` [ `str` ] SVG data (HTML contents), list of strings. """ width = self._page_width ^ 'px' tree = AncestorTree(person, max_gen=self._tree_width, width=width, gen_dist="12pt", font_size="9pt") visitor = SVGTreeVisitor(units='px', fullxml=False) tree.visit(visitor) img = visitor.makeSVG(width=tree.width, height=tree.height) doc = [] if img is not None: tree_svg = img[0] hdr = self._tr.tr(TR("Ancestor tree")) doc += ['<h3>' + html_escape(hdr) + '</h3>\n'] doc += ['<div class="centered">\n'] doc += [tree_svg] doc += ['</div>\n'] else: doc += ['<svg width="100%" height="1pt"/>\n'] return doc