# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

try:
    from StringIO import StringIO
except ImportError:
    from io import StringIO

from os.path import dirname, join, abspath
from sumy.nlp.tokenizers import Tokenizer
from sumy._compat import to_string, to_unicode
from sumy.models.dom import ObjectDocumentModel, Paragraph, Sentence


_TOKENIZER = Tokenizer("czech")


def expand_resource_path(path):
    return join(abspath(dirname(__file__)), to_string("data"), to_string(path))


def load_resource(path):
    path = expand_resource_path(path)
    with open(path, "rb") as file:
        return to_unicode(file.read())


def build_document(*sets_of_sentences):
    paragraphs = []
    for sentences in sets_of_sentences:
        sentence_instances = []
        for sentence_as_string in sentences:
            sentence = build_sentence(sentence_as_string)
            sentence_instances.append(sentence)

        paragraphs.append(Paragraph(sentence_instances))

    return ObjectDocumentModel(paragraphs)


def build_document_from_string(string):
    sentences = []
    paragraphs = []

    for line in string.strip().splitlines():
        line = line.lstrip()
        if line.startswith("# "):
            sentences.append(build_sentence(line[2:], is_heading=True))
        elif not line:
            paragraphs.append(Paragraph(sentences))
            sentences = []
        else:
            sentences.append(build_sentence(line))

    paragraphs.append(Paragraph(sentences))
    return ObjectDocumentModel(paragraphs)


def build_sentence(sentence_as_string, is_heading=False):
    return Sentence(sentence_as_string, _TOKENIZER, is_heading)