# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TODO: Add a description here."""

import evaluate
import datasets
import nltk


_CITATION = """\
@article{Shen2022,
archivePrefix = {arXiv},
arxivId = {2202.08479},
author = {Shen, Lingfeng and Liu, Lemao and Jiang, Haiyun and Shi, Shuming},
journal = {EMNLP 2022 - 2022 Conference on Empirical Methods in Natural Language Processing, Proceedings},
eprint = {2202.08479},
month = {feb},
number = {1},
pages = {3178--3190},
title = {{On the Evaluation Metrics for Paraphrase Generation}},
url = {http://arxiv.org/abs/2202.08479},
year = {2022}
}
"""

_DESCRIPTION = """\
ParaScore is a new metric to scoring the performance of paraphrase generation tasks
"""


# TODO: Add description of the arguments of the module here
_KWARGS_DESCRIPTION = """
Calculates how good the paraphrase is
Args:
    predictions: list of predictions to score. Each predictions
        should be a string with tokens separated by spaces.
    references: list of reference for each prediction. Each
        reference should be a string with tokens separated by spaces.
Returns:
    score: description of the first score,
Examples:
    Examples should be written in doctest format, and should illustrate how
    to use the function.

    >>> metrics = evaluate.load("transZ/test_parascore")
    >>> results = my_new_module.compute(references=["They work for 6 months"], predictions=["They have working for 6 months"])
    >>> print(results)
    {'score': 0.85}
"""

# TODO: Define external resources urls if needed
BAD_WORDS_URL = "https://github.com/shadowkiller33/parascore_toolkit"


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class test_parascore(evaluate.Metric):
    """ParaScore is a new metric to scoring the performance of paraphrase generation tasks"""

    def _info(self):
        return evaluate.MetricInfo(
            # This is the description that will appear on the modules page.
            module_type="metric",
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            # This defines the format of each prediction and reference
            features=[
                datasets.Features(
                    {
                        "predictions": datasets.Value("string", id="sequence"),
                        "references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
                    }
                ),
                datasets.Features(
                    {
                        "predictions": datasets.Value("string", id="sequence"),
                        "references": datasets.Value("string", id="sequence"),
                    }
                ),
            ],
            # Homepage of the module for documentation
            homepage="https://github.com/shadowkiller33/ParaScore",
            # Additional links to the codebase or references
            codebase_urls=["https://github.com/shadowkiller33/ParaScore"],
            reference_urls=["https://github.com/shadowkiller33/ParaScore"]
        )

    def _download_and_prepare(self, dl_manager):
        """Optional: download external resources useful to compute the scores"""
        self.sbert_cosine = evaluate.load('transZ/sbert_cosine')

    def _edit(self, x, y, lang='en'):
        if lang == 'zh':
            x = x.replace(" ", "")
            y = y.replace(" ", "")
        a = len(x)
        b = len(y)
        dis = nltk.edit_distance(x,y)
        return dis/max(a,b)

    def _diverse(self, cands, sources, lang='en'):
        diversity = []
        thresh = 0.35
        for x, y in zip(cands, sources):
            div = self._edit(x, y, lang)
            if div >= thresh:
                ss = thresh
            elif div < thresh:
                ss = -1 + ((thresh + 1) / thresh) * div
            diversity.append(ss)
        return diversity

    def _compute(self, predictions, references, model_type='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', lang='en'):
        """Returns the scores"""
        
        score = self.sbert_cosine.compute(predictions=predictions, references=references, model_type=model_type)
        sbert_score = [round(v, 2) for v in score['score']]
        diversity = self._diverse(predictions, references, lang)

        score = [s + 0.05 * d for s, d in zip(sbert_score, diversity)]
        return {
            "score": score,
        }