#!/usr/bin/env python3
# -*- coding: utf-8 -*- vim60:fdm=marker
#
# Copyright: 2016, Maximiliano Curia <maxy@debian.org>
#
# License: ISC
#  Permission to use, copy, modify, and/or distribute this software for any
#  purpose with or without fee is hereby granted, provided that the above
#  copyright notice and this permission notice appear in all copies.
#  .
#  THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
#  REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
#  AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
#  INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
#  LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
#  OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
#  PERFORMANCE OF THIS SOFTWARE.

''' Miscelaneous data types '''

import re

from collections import namedtuple

ReSub = namedtuple('ReSub', ('re', 'repl'))
UNKNOWN = 'Unknown'
UNKNOWN_COPYRIGHTED = 'UnknownCopyrighted'


class License(object):

    licenses = {}

    def __init__(self, name):
        self.name = name
        self.stored = None

    def __str__(self):
        if self.stored:
            return self.stored.dump().rstrip('\n')
        return ('License: {name}\n'
                'Comment: Add the corresponding license text here'.format(
                    name=self.name))

    @staticmethod
    def get(name):
        if name not in License.licenses:
            License.licenses[name] = License(name)

        return License.licenses[name]


class YearRange(object):

    def __init__(self, low=0, high=0):
        low_value = int(low)
        high_value = int(high)
        if low_value > high_value:
            low_value, high_value = high_value, low_value
        self.low = low_value
        self.high = high_value

    def __in__(self, year):
        value = int(year)
        return self.low <= value <= self.high

    def add(self, year):
        value = int(year)
        if not value:
            return
        if not self.low or value < self.low:
            self.low = value
        if not self.high or self.high < value:
            self.high = value
        return self

    def newer(self, other):
        if self.high and other.high:
            return other.high > self.high
        return not self.high and other.high

    def merge(self, other):
        self.add(other.low)
        self.add(other.high)
        return self

    def __str__(self):
        if not self.low:
            return ''
        if self.low == self.high:
            return str(self.low)
        return str(self.low) + '-' + str(self.high)


class CopyrightHolder(object):

    year_re = re.compile(r'\s*(?:[\s:([]*)?(?P<lo>\d{2,})[]:\s]*'
                         r'(?:[-~=–—][\s:[]*(?P<hi>\d{1,})[]:\s]*)?[,/)]*')
    holder_re = re.compile(r'\s*(?:by\s*)?(?P<holder>\S.*?\S)[\s"\*,;/]*$', re.I)

    name_email_re = re.compile(
        r'(?P<name>\S.*?\S)?(?(name)(?:\s|(?=[<(])|$)|)\s*'
        r'[<(/\\]*(?P<email>[^\s<>]+?@[^\s<>@]+?)?(?(email)[)<>/\\]*|)$')

    email_subs = [
        ReSub(re.compile(r'</?tt>'), r''),
        ReSub(re.compile(r'%20'), r' '),
        ReSub(re.compile(r'&lt;?'), r'<'),
        ReSub(re.compile(r'&gt;?'), r'>'),
        ReSub(re.compile(r'&#x40;'), r'@'),
        ReSub(re.compile(r'&ldquo;?'), r'"'),
        ReSub(re.compile(r'\(c\)$', re.I), r''),
        # Expensive fix for ") at the end of the string
        ReSub(re.compile(r'((?P<paren>\()?(?(paren).*?|))(?(paren)|\)+)?$'), r'\1'),
        ReSub(re.compile(r'\s+\(?(where|at|@)\)?\s+', re.I), r'@'),
        ReSub(re.compile(r'\(at\)', re.I), r'@'),
        ReSub(re.compile(r'\s+\(?do?[tm]\)?\s+', re.I), r'.'),
        # Ugly fix for >mail@example.com<
        ReSub(re.compile(r'(?:^|(?<=\s))\s*\>\s*(?=\w(?:\w|[.-])*@)'), r'<'),
        ReSub(re.compile(r'\<\s*$'), r'>'),
        ReSub(re.compile(r'(?:^|(?<=\s))\s*((?!\<)\w(?:\w|[.-])*@'
                         r'?:\w(?:\w|-)+(?:\.\w(?:\w|-)+)+(?<!>))\s*(?:(?=\s)|$)'),
              r'<\1>'),
    ]

    name_cruft_subs = [
        ReSub(re.compile(r'</item>', re.IGNORECASE), r''),
        ReSub(re.compile(r'^>', re.IGNORECASE), r''),
        ReSub(re.compile(r'<$', re.IGNORECASE), r''),
        ReSub(re.compile(r'\\[nt]$', re.IGNORECASE), r''),
        ReSub(re.compile(r'^\(\s*c\s*\)\s*', re.IGNORECASE), r''),
    ]

    def __init__(self, name, email, years):
        self.name = name
        self.email = email
        self.years = years

    def merge(self, other):
        if other.name and self.years.newer(other.years):
            self.name = other.name
        self.years.merge(other.years)
        return self

    @property
    def person(self):
        result = self.name
        if self.name and self.email:
            result += ' '
        if self.email:
            result += '<{}>'.format(self.email)
        return result

    def __str__(self):
        result = str(self.years)
        result += ', ' if result else ''
        result += self.person
        return result

    def __repr__(self):
        return str(self)

    @staticmethod
    def _get_year(text):
        year = int(text)
        if year < 50:
            year += 2000
        if year < 100:
            year += 1900
        return year

    @staticmethod
    def get_name_email(text):
        # De-cruft email
        for sub in CopyrightHolder.email_subs:
            text = sub.re.sub(sub.repl, text)

        match = CopyrightHolder.name_email_re.match(text)
        if not match:
            return None, None
        match_dict = match.groupdict()
        name = match_dict.get('name', '')
        if name is None:
            name = ''
        name = name.strip(r''',.;*'"@-–—[]{}    ''')
        for sub in CopyrightHolder.name_cruft_subs:
            name = sub.re.sub(sub.repl, name)
        email = match_dict.get('email', None)
        return name, email

    @staticmethod
    def from_copyright(copyright_):

        def get_years(text, years):
            start = len(text)
            end = 0
            year_match = CopyrightHolder.year_re.search(text)
            while year_match:
                match_dict = year_match.groupdict()
                low = CopyrightHolder._get_year(match_dict['lo'])
                years.add(low)
                if match_dict.get('hi', None):
                    high = CopyrightHolder._get_year(match_dict['hi'])
                    if high < low:
                        # 2001-4 -> '200' + '4'
                        # new_high = \
                        #      match_dict['lo'][:- len(match_dict['hi'])] + \
                        #          match_dict['hi']
                        high = CopyrightHolder._get_year(match_dict['hi'])
                    years.add(high)
                if start > year_match.start(0):
                    start = year_match.start(0)
                end = year_match.end(0)
                year_match = CopyrightHolder.year_re.match(text, end)
            return start, end

        years = YearRange()
        start, end = get_years(copyright_, years)
        if start < end:
            copyright_ = copyright_[:start] + copyright_[end:]

        match = CopyrightHolder.holder_re.match(copyright_)
        if match:
            holder = match.group('holder')
            name, email = CopyrightHolder.get_name_email(holder)
            if not name and not email:
                return
            return CopyrightHolder(name, email, years)
