Source code for ged2doc.writer

"""Module which defines base class for all writer classes.
"""

__all__ = ["Writer"]

import abc
import locale
import logging

from .events import indi_attributes, indi_events, family_events
from .name import name_fmt

from . import utils
from ged4py import model, parser
from ged4py.date import DateValue


_log = logging.getLogger(__name__)


def TR(x):
    """This is no-op function, only used to mark translatable strings,
    to extract all strings run ``pygettext -k TR ...``
    """
    return x  # NOQA


def _spouse(person, fam):
    """Returns person spouse in a given family
    """
    # list of Pointers
    spouses = fam.sub_tags("HUSB", "WIFE", follow=False)
    spouses = [rec for rec in spouses
               if rec.value != person.xref_id]
    # more than one spouse is odd (from the structural concern)
    if spouses:
        return spouses[0].ref
    return None


[docs]class Writer(metaclass=abc.ABCMeta):
    """Base class for document writers.

    This class knows how to extract all relevant information from GEDCOM data
    and convert it into output document. It defines basic structure of the
    produced document (sequence of section and sub-sections) and it depends
    on the subclasses to implement specific rendering of output information
    into document-specific format. Subclasses will need to implement small set
    of methods (see _render methods below).

    Parameters
    ----------
    flocator : `ged2doc.input.FileLocator`
        File locator instance.
    tr : `ged2doc.i18n.I18N`
        Object supporting translation.
    encoding : `str`, optional
        GEDCOM file encoding, if ``None`` then encoding is determined from
        file itself.
    encoding_errors : `str`, optional
        Controls error handling behavior during string decoding, one of
        "strict" (default), "ignore", or "replace".
    sort_order : `ged4py.model.NameOrder`, optional
        Determines ordering of person in output file, one of the constants
        defined in `ged4py.model.NameOrder` enum.
    name_fmt : `int`, optional
        Bit mask with flags from `ged2doc.name` module.
    make_images : `bool`, optional
        If ``True`` (default) then generate images for persons.
    make_stat : `bool`, optional
        If ``True`` (default) then generate statistics section.
    make_toc : `bool`, optional
        If ``True`` (default) then generate Table of Contents.
    events_without_dates : `bool`, optional
        If ``True`` (default) then show events that have no associated dates.
    """
    def __init__(self, flocator, tr, encoding=None, encoding_errors="strict",
                 sort_order=model.NameOrder.SURNAME_GIVEN, name_fmt=0,
                 make_images=True, make_stat=True, make_toc=True,
                 events_without_dates=True):
        self._floc = flocator
        self._encoding = encoding
        self._encoding_errors = encoding_errors
        self._sort_order = sort_order
        self._name_fmt = name_fmt
        self._make_images = make_images
        self._make_stat = make_stat
        self._make_toc = make_toc
        self._events_without_dates = events_without_dates
        self._tr = tr

[docs]    def save(self):
        """Produce output document.

        This is the main (and the only one client-callable) method of the
        writers, it will parse GEDCOM structure and produce output document
        from it.
        """
        gfile = self._floc.open_gedcom()
        if not gfile:
            raise OSError("Failed to locate input file")

        reader = parser.GedcomReader(gfile, encoding=self._encoding,
                                     errors=self._encoding_errors)

        # generate starting sequence
        self._render_prolog()

        # title page
        title = self._tr.tr(TR("Person List"))
        self._render_section(1, 'personList', title)

        # Index of all INDI records
        _log.debug('Scan all INDI records')

        # filter out some fake records that some apps add
        indis = []
        for indi in reader.records0('INDI'):
            if indi.sub_tag_value("_UID") == "Unassociated photos":
                continue
            indis.append(indi)

        # loop over all individuals
        indis.sort(key=self._indi_sort_key)
        for person in indis:

            name = name_fmt(person.name, self._name_fmt)

            person_id = "person." + person.xref_id
            self._render_section(2, person_id, name, True)

            _log.debug('Found INDI: %s', person)
            _log.debug('INDI name: %r', name)

            image_data = self._make_main_image(person)

            attributes = []

            # birth date and place
            born = []
            bday = person.sub_tag("BIRT/DATE")
            if bday:
                born += [self._tr.tr_date(bday.value)]
            else:
                born += [self._tr.tr(TR('Date Unknown'), person.sex)]
            bplace = person.sub_tag_value("BIRT/PLAC")
            if bplace:
                born += [bplace]
            born = ', '.join(born)
            if born:
                attributes += [(self._tr.tr(TR('Born'), person.sex), born)]

            # maiden name
            if person.name.maiden:
                attributes += [(self._tr.tr(TR('Maiden name'), person.sex),
                                person.name.maiden)]

            # Parents
            if person.mother:
                attributes += [(self._tr.tr(TR('Mother'), person.mother.sex),
                                self._person_ref(person.mother))]
            if person.father:
                attributes += [(self._tr.tr(TR('Father'), person.father.sex),
                                self._person_ref(person.father))]

            # add some extra info
            indi_attr = indi_attributes(person)
            for tag in ['EDUC', 'OCCU', 'RESI', 'NMR', 'NCHI', 'TITL', 'DSCR',
                        'RELI', 'FACT']:
                for attrib in indi_attr:
                    if attrib.tag == tag:
                        attributes += [self._format_indi_attr(person, attrib)]

            # all families as spouse
            families = []
            own_kids = []
            fams = person.sub_tags("FAMS")
            for fam in fams:

                spouse = _spouse(person, fam)
                children = fam.sub_tags("CHIL")

                children_ids = [rec.xref_id for rec in children]
                _log.debug('spouse = %s; children ids = %s; children = %s',
                           spouse, children_ids, children)

                if spouse:
                    pfmt = '{person}: {ref}'
                    family = pfmt.format(person=self._tr.tr(TR('Spouse'),
                                                            spouse.sex),
                                         ref=self._person_ref(spouse))
                    kids = []
                    if children:
                        kids = [self._person_ref(c, c.name.first)
                                for c in children]
                        family += "; " + self._tr.tr(TR('kids')) + ': ' + \
                            ', '.join(kids)
                    families += [family]
                else:
                    own_kids += [self._person_ref(c, c.name.first)
                                 for c in children]
            if own_kids:
                family = self._tr.tr(TR('Kids')) + ': ' + ', '.join(own_kids)
                families += [family]

            # collect all events from person and families
            events = self._events(person)

            # Comments are published as set of paragraphs
            notes = []
            for note in person.sub_tags('NOTE'):
                notes += note.value.split('\n')

            # render whole person info
            self._render_person(person, image_data, attributes, families,
                                events, notes)

        # generate some stats
        if self._make_stat:
            section = self._tr.tr(TR("Statistics"))
            self._render_section(1, 'statistics', section)

            section = self._tr.tr(TR("Total Statistics"))
            self._render_section(2, 'total_statistics', section)

            nmales = len([person for person in indis if person.sex == 'M'])
            nfemales = len([person for person in indis if person.sex == 'F'])
            self._render_name_stat(len(indis), nfemales, nmales)

            section = self._tr.tr(TR("Name Statistics"))
            self._render_section(2, 'name_statistics', section)

            section = self._tr.tr(TR("Female Name Frequency"))
            self._render_section(3, 'female_name_freq', section)
            name_freq = self._name_freq(person for person in indis
                                        if person.sex == 'F')
            self._render_name_freq(name_freq)

            section = self._tr.tr(TR("Male Name Frequency"))
            self._render_section(3, 'male_name_freq', section)
            name_freq = self._name_freq(person for person in indis
                                        if person.sex == 'M')
            self._render_name_freq(name_freq)

        # add table of contents
        if self._make_toc:
            self._render_toc()

        # finish
        self._finalize()

[docs]    def _indi_sort_key(self, indi):
        """Return name ordering key for individual.

        Parameters
        ----------
        indi : `ged4py.model.Individual`
            INDI record representation.

        Returns
        -------
        order : `tuple` [ `str` ]
        """
        # make key from name, this is a tuple of unicode strings
        key = indi.name.order(self._sort_order)

        # we want locale-aware ordering
        key = tuple(locale.strxfrm(x) for x in key)

        return key

[docs]    def _events(self, person):
        """Returns a list of events for a given person.

        Returned list contains tuples (date, info).

        Parameters
        ----------
        person : `ged4py.model.Individual`
            INDI record representation.

        Returns
        -------
        events : `list` [ `tuple` ]
            List of tuples with two elements: date and event information.
        """
        # collect all events from person and families
        events = []
        for evt in indi_events(person):
            # BIRT was already rendered
            if evt.tag != 'BIRT':
                # for generic EVEN event, use TYPE as even name, we cannot
                # translate it because it can be anything
                if evt.tag == 'EVEN' and evt.type:
                    event = evt.type
                else:
                    event = self._tr.tr("EVENT." + evt.tag, person.sex)
                facts = [event,
                         evt.value,
                         evt.place,
                         evt.note]
                if evt.cause:
                    pfmt = self._tr.tr(TR("EVENT.CAUSE: {cause}"), person.sex)
                    facts.append(pfmt.format(cause=evt.cause))
                events += [(evt.date, facts)]

        for fam in person.sub_tags("FAMS"):

            spouse = _spouse(person, fam)

            for evt in family_events(fam):
                facts = [self._tr.tr("FAMEVT." + evt.tag)]
                if spouse:
                    note = '{spouse}: {ref}'.format(
                        spouse=self._tr.tr(TR('Spouse'), spouse.sex),
                        ref=self._person_ref(spouse))
                    facts += [note]
                facts += [evt.value,
                          evt.place,
                          evt.note]
                events += [(evt.date, facts)]

            for child in fam.sub_tags("CHIL"):
                for evt in indi_events(child, ['BIRT']):
                    pfmt = self._tr.tr(TR("CHILD.BORN {child}"),
                                       child.sex)
                    childRef = self._person_ref(child, child.name.first)
                    facts = [pfmt.format(child=childRef),
                             evt.value,
                             evt.place,
                             evt.note]
                    events += [(evt.date, facts)]

        def _date_key(event):
            "Return event date, used for comparison"
            date = event[0]
            if date is None:
                # use date in the future
                date = DateValue.parse(None)
            return date

        # order events (only those with dates)
        sevents = []
        for date, facts in sorted(events, key=_date_key):
            facts = [fact for fact in facts if fact]
            facts = "; ".join(facts)
            if date is None:
                if self._events_without_dates:
                    sevents += [(self._tr.tr(TR("Event Date Unknown")), facts)]
            else:
                sevents += [(self._tr.tr_date(date), facts)]

        return sevents

[docs]    def _make_main_image(self, person):
        """Returns image for a person.

        Parameters
        ----------
        person : `ged4py.model.Individual`
            INDI record representation.

        Returns
        -------
        image_data : `bytes` or ``None``
            Bytes of the image data or ``None``.
        """

        if not self._make_images:
            return None

        path = utils.person_image_file(person)
        if path:

            _log.debug('Found media file name %s', path)

            # find image file, try to open it
            imgfile = self._floc.open_image(path)
            if not imgfile:
                _log.warning('Failed to locate image file "%s"', path)
            else:
                _log.debug('Opened image file %s', path)
                imgdata = imgfile.read()
                return imgdata

        return None

[docs]    def _name_freq(self, people):
        """Returns name frequency table.

        Parameters
        ----------
        people : iterable of `ged4py.model.Individual`
            Sequence of INDI records.

        Returns
        -------
        table : `list` [ `tuple` ]
            List of (name, count) ordered by name.
        """
        namefreq = {}
        for person in people:
            namefreq.setdefault(person.name.first, 0)
            namefreq[person.name.first] += 1
        namefreq = [(key, val) for key, val in namefreq.items()]
        # sort ascending in name
        namefreq.sort()
        return namefreq

[docs]    def _format_indi_attr(self, person, attrib, prefix="ATTR."):
        """Formatting of the individual's attributes.

        Parameters
        ----------
        person : `ged4py.model.Individual`
            INDI record representation.
        attrib : `ged2doc.events.Event`
            Attribute structure.
        prefix : `str`, optional
            Prefix added to attribute tag before translation.

        Returns
        -------
        attribute : `tuple`
            Tuple (attribute, value).
        """

        # for generic FACT attribute, use TYPE as fact name, we cannot
        # translate it because it can be anything
        if attrib.tag == 'FACT' and attrib.type:
            attr = attrib.type
        else:
            attr = self._tr.tr(prefix + attrib.tag, person.sex)

        props = []
        if attrib.value:
            props.append(attrib.value)
        if attrib.date:
            props.append(self._tr.tr_date(attrib.date))
        if attrib.place:
            props.append(attrib.place)
        if attrib.note:
            props.append(attrib.note)
        props = ", ".join(props)
        return (attr, props)

[docs]    def _person_ref(self, person, name=None):
        """Returns encoded person reference.

        If person is None then None is returned. If name is not given then
        properly formatted person full name is used.

        Encoded reference consists of ASCII character SOH (\001) followed by
        reference ID, STX (\002), person name, and ETX (\003). This sequence
        will be embedded in the text and it should be interpreted later by
        subclasses to produce properly formatted reference in a backend-
        specific format.

        Parameters
        ----------
        person : `ged4py.model.Individual`
            INDI record representation.
        name : `str`, optional
            Name to use instead of person name.

        Returns
        -------
        person_ref : `str`
        """
        if person is None:
            return None
        if name is None:
            name = name_fmt(person.name, self._name_fmt)
        return utils.embed_ref(person.xref_id, name)

[docs]    @abc.abstractmethod
    def _render_prolog(self):
        """Generate initial document header/title.
        """
        raise NotImplementedError()

[docs]    @abc.abstractmethod
    def _render_section(self, level, ref_id, title, newpage=False):
        """Produces new section in the output document.

        This method should also save section reference so that TOC can be
        later produced when `_render_toc` method is called.

        Parameters
        ----------
        level : `int`
            Section level (1, 2, 3, etc.).
        ref_id : `str`
            Unique section identifier.
        title : `str`
            Printable section name.
        newpage : `bool`, optional
            If ``True`` then start new page (for documents that support
            pagination).
        """
        raise NotImplementedError()

[docs]    @abc.abstractmethod
    def _render_person(self, person, image_data, attributes, families,
                       events, notes):
        """Output person information.

        Parameters
        ----------
        person : `ged4py.model.Individual`
            INDI record representation.
        image_data : `bytes` or ``None``
            Either `None` or binary image data (typically content of JPEG
            image).
        attributes : `list` [ `tuple` ]
            List of (attr_name, text) tuples, may be empty.
        families : `list` [ `str` ]
            List of strings (possibly empty), each string contains description
            of one family and should be typically rendered as a separate
            paragraph.
        events : `list` [ `tuple` ]
            List of (date, text) tuples, may be empty. Date is properly
            formatted string and does not need any other formatting.
        notes : `list` [ `str` ]
            List of strings, each string should be rendered as separate
            paragraph.

        Notes
        -----
        Textual information in parameters to this method can include
        references to other persons (e.g. mother/father). Such references are
        embedded into text in encoded format determined by `_person_ref`
        method. It is responsibility of the subclasses to extract these
        references from text and re-encode them using proper backend
        representation.
        """
        raise NotImplementedError()

[docs]    @abc.abstractmethod
    def _render_name_stat(self, n_total, n_females, n_males):
        """Produces summary table.

        Sum of male and female counters can be lower than total count due to
        individuals with unknown/unspecified gender.

        Parameters
        ----------
        n_total : `int`
            Total number of individuals.
        n_females : `int`
            Number of female individuals.
        n_males : `int`
            Number of male individuals.
        """
        raise NotImplementedError()

[docs]    @abc.abstractmethod
    def _render_name_freq(self, freq_table):
        """Produces name statistics table.

        Parameters
        ----------
        freq_table : `list` [ `tuple` ]
            List of (name, count) tuples.
        """
        raise NotImplementedError()

[docs]    @abc.abstractmethod
    def _render_toc(self):
        """Produce table of contents using info collected in
        `_render_section()`.
        """
        raise NotImplementedError()

[docs]    @abc.abstractmethod
    def _finalize(self):
        """Finalize output.
        """
        raise NotImplementedError()