Spaces:

vichyt
/

metric-codebleu

Runtime error

App Files Files Community

ted commited on Aug 9, 2023

Commit

5d96d01

•

1 Parent(s): 6a12783

[feat] Add new codebleu

Browse files

Files changed (16) hide show

app.py +1 -1
eval/__init__.py +0 -1
eval/bleu.py +0 -590
eval/code_bleu.py +0 -44
eval/dataflow_match.py +0 -148
eval/keywords/python.txt +0 -35
eval/parser/DFG.py +0 -1186
eval/parser/__init__.py +0 -8
eval/parser/build.py +0 -15
eval/parser/build.sh +0 -2
eval/parser/utils.py +0 -101
eval/syntax_match.py +0 -76
eval/utils.py +0 -106
eval/weighted_ngram_match.py +0 -558
codebleu.py → metric-codebleu.py +59 -35
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,5 +2,5 @@ import evaluate
 from evaluate.utils import launch_gradio_widget
-module = evaluate.load("vichyt/codebleu")
 launch_gradio_widget(module)

 from evaluate.utils import launch_gradio_widget
+module = evaluate.load("vichyt/metric-codebleu")
 launch_gradio_widget(module)

eval/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- import code_bleu

eval/bleu.py DELETED Viewed

@@ -1,590 +0,0 @@
-# -*- coding: utf-8 -*-
-# Natural Language Toolkit: BLEU Score
-#
-# Copyright (C) 2001-2020 NLTK Project
-# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
-# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-"""BLEU score implementation."""
-import math
-import sys
-from fractions import Fraction
-import warnings
-from collections import Counter
-from utils import ngrams
-import pdb
-def sentence_bleu(
-    references,
-    hypothesis,
-    weights=(0.25, 0.25, 0.25, 0.25),
-    smoothing_function=None,
-    auto_reweigh=False,
-):
-    """
-    Calculate BLEU score (Bilingual Evaluation Understudy) from
-    Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
-    "BLEU: a method for automatic evaluation of machine translation."
-    In Proceedings of ACL. http://www.aclweb.org/anthology/P02-1040.pdf
-    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-    ...               'ensures', 'that', 'the', 'military', 'always',
-    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
-    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
-    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
-    ...               'that', 'party', 'direct']
-    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
-    ...               'heed', 'Party', 'commands']
-    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-    ...               'guarantees', 'the', 'military', 'forces', 'always',
-    ...               'being', 'under', 'the', 'command', 'of', 'the',
-    ...               'Party']
-    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-    ...               'of', 'the', 'party']
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
-    0.5045...
-    If there is no ngrams overlap for any order of n-grams, BLEU returns the
-    value 0. This is because the precision for the order of n-grams without
-    overlap is 0, and the geometric mean in the final BLEU score computation
-    multiplies the 0 with the precision of other n-grams. This results in 0
-    (independently of the precision of the othe n-gram orders). The following
-    example has zero 3-gram and 4-gram overlaps:
-    >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
-    0.0
-    To avoid this harsh behaviour when no ngram overlaps are found a smoothing
-    function can be used.
-    >>> chencherry = SmoothingFunction()
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
-    ...     smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
-    0.0370...
-    The default BLEU calculates a score for up to 4-grams using uniform
-    weights (this is called BLEU-4). To evaluate your translations with
-    higher/lower order ngrams, use customized weights. E.g. when accounting
-    for up to 5-grams with uniform weights (this is called BLEU-5) use:
-    >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
-    0.3920...
-    :param references: reference sentences
-    :type references: list(list(str))
-    :param hypothesis: a hypothesis sentence
-    :type hypothesis: list(str)
-    :param weights: weights for unigrams, bigrams, trigrams and so on
-    :type weights: list(float)
-    :param smoothing_function:
-    :type smoothing_function: SmoothingFunction
-    :param auto_reweigh: Option to re-normalize the weights uniformly.
-    :type auto_reweigh: bool
-    :return: The sentence-level BLEU score.
-    :rtype: float
-    """
-    return corpus_bleu(
-        [references], [hypothesis], weights, smoothing_function, auto_reweigh
-    )
-def corpus_bleu(
-    list_of_references,
-    hypotheses,
-    weights=(0.25, 0.25, 0.25, 0.25),
-    smoothing_function=None,
-    auto_reweigh=False,
-):
-    """
-    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
-    the hypotheses and their respective references.
-    Instead of averaging the sentence level BLEU scores (i.e. marco-average
-    precision), the original BLEU metric (Papineni et al. 2002) accounts for
-    the micro-average precision (i.e. summing the numerators and denominators
-    for each hypothesis-reference(s) pairs before the division).
-    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-    ...         'ensures', 'that', 'the', 'military', 'always',
-    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
-    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
-    ...          'heed', 'Party', 'commands']
-    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-    ...          'guarantees', 'the', 'military', 'forces', 'always',
-    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
-    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
-    ...          'of', 'the', 'party']
-    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
-    ...         'interested', 'in', 'world', 'history']
-    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
-    ...          'because', 'he', 'read', 'the', 'book']
-    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
-    >>> hypotheses = [hyp1, hyp2]
-    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
-    0.5920...
-    The example below show that corpus_bleu() is different from averaging
-    sentence_bleu() for hypotheses
-    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
-    >>> score2 = sentence_bleu([ref2a], hyp2)
-    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
-    0.6223...
-    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
-    :type list_of_references: list(list(list(str)))
-    :param hypotheses: a list of hypothesis sentences
-    :type hypotheses: list(list(str))
-    :param weights: weights for unigrams, bigrams, trigrams and so on
-    :type weights: list(float)
-    :param smoothing_function:
-    :type smoothing_function: SmoothingFunction
-    :param auto_reweigh: Option to re-normalize the weights uniformly.
-    :type auto_reweigh: bool
-    :return: The corpus-level BLEU score.
-    :rtype: float
-    """
-    # Before proceeding to compute BLEU, perform sanity checks.
-    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
-    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
-    hyp_lengths, ref_lengths = 0, 0
-    assert len(list_of_references) == len(hypotheses), (
-        "The number of hypotheses and their reference(s) should be the " "same "
-    )
-    # Iterate through each hypothesis and their corresponding references.
-    for references, hypothesis in zip(list_of_references, hypotheses):
-        # For each order of ngram, calculate the numerator and
-        # denominator for the corpus-level modified precision.
-        for i, _ in enumerate(weights, start=1):
-            p_i = modified_precision(references, hypothesis, i)
-            p_numerators[i] += p_i.numerator
-            p_denominators[i] += p_i.denominator
-        # Calculate the hypothesis length and the closest reference length.
-        # Adds them to the corpus-level hypothesis and reference counts.
-        hyp_len = len(hypothesis)
-        hyp_lengths += hyp_len
-        ref_lengths += closest_ref_length(references, hyp_len)
-    # Calculate corpus-level brevity penalty.
-    bp = brevity_penalty(ref_lengths, hyp_lengths)
-    # Uniformly re-weighting based on maximum hypothesis lengths if largest
-    # order of n-grams < 4 and weights is set at default.
-    if auto_reweigh:
-        if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25):
-            weights = (1 / hyp_lengths,) * hyp_lengths
-    # Collects the various precision values for the different ngram orders.
-    p_n = [
-        Fraction(p_numerators[i], p_denominators[i], _normalize=False)
-        for i, _ in enumerate(weights, start=1)
-    ]
-    # Returns 0 if there's no matching n-grams
-    # We only need to check for p_numerators[1] == 0, since if there's
-    # no unigrams, there won't be any higher order ngrams.
-    if p_numerators[1] == 0:
-        return 0
-    # If there's no smoothing, set use method0 from SmoothinFunction class.
-    if not smoothing_function:
-        smoothing_function = SmoothingFunction().method1
-    # Smoothen the modified precision.
-    # Note: smoothing_function() may convert values into floats;
-    #       it tries to retain the Fraction object as much as the
-    #       smoothing method allows.
-    p_n = smoothing_function(
-        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
-    )
-    s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n))
-    s = bp * math.exp(math.fsum(s))
-    return s
-def modified_precision(references, hypothesis, n):
-    """
-    Calculate modified ngram precision.
-    The normal precision method may lead to some wrong translations with
-    high-precision, e.g., the translation, in which a word of reference
-    repeats several times, has very high precision.
-    This function only returns the Fraction object that contains the numerator
-    and denominator necessary to calculate the corpus-level precision.
-    To calculate the modified precision for a single pair of hypothesis and
-    references, cast the Fraction object into a float.
-    The famous "the the the ... " example shows that you can get BLEU precision
-    by duplicating high frequency words.
-        >>> reference1 = 'the cat is on the mat'.split()
-        >>> reference2 = 'there is a cat on the mat'.split()
-        >>> hypothesis1 = 'the the the the the the the'.split()
-        >>> references = [reference1, reference2]
-        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
-        0.2857...
-    In the modified n-gram precision, a reference word will be considered
-    exhausted after a matching hypothesis word is identified, e.g.
-        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-        ...               'ensures', 'that', 'the', 'military', 'will',
-        ...               'forever', 'heed', 'Party', 'commands']
-        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-        ...               'guarantees', 'the', 'military', 'forces', 'always',
-        ...               'being', 'under', 'the', 'command', 'of', 'the',
-        ...               'Party']
-        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-        ...               'of', 'the', 'party']
-        >>> hypothesis = 'of the'.split()
-        >>> references = [reference1, reference2, reference3]
-        >>> float(modified_precision(references, hypothesis, n=1))
-        1.0
-        >>> float(modified_precision(references, hypothesis, n=2))
-        1.0
-    An example of a normal machine translation hypothesis:
-        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-        ...               'ensures', 'that', 'the', 'military', 'always',
-        ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
-        >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
-        ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
-        ...               'that', 'party', 'direct']
-        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-        ...               'ensures', 'that', 'the', 'military', 'will',
-        ...               'forever', 'heed', 'Party', 'commands']
-        >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-        ...               'guarantees', 'the', 'military', 'forces', 'always',
-        ...               'being', 'under', 'the', 'command', 'of', 'the',
-        ...               'Party']
-        >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-        ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-        ...               'of', 'the', 'party']
-        >>> references = [reference1, reference2, reference3]
-        >>> float(modified_precision(references, hypothesis1, n=1)) # doctest: +ELLIPSIS
-        0.9444...
-        >>> float(modified_precision(references, hypothesis2, n=1)) # doctest: +ELLIPSIS
-        0.5714...
-        >>> float(modified_precision(references, hypothesis1, n=2)) # doctest: +ELLIPSIS
-        0.5882352941176471
-        >>> float(modified_precision(references, hypothesis2, n=2)) # doctest: +ELLIPSIS
-        0.07692...
-    :param references: A list of reference translations.
-    :type references: list(list(str))
-    :param hypothesis: A hypothesis translation.
-    :type hypothesis: list(str)
-    :param n: The ngram order.
-    :type n: int
-    :return: BLEU's modified precision for the nth order ngram.
-    :rtype: Fraction
-    """
-    # Extracts all ngrams in hypothesis
-    # Set an empty Counter if hypothesis is empty.
-    counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
-    # Extract a union of references' counts.
-    # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
-    max_counts = {}
-    for reference in references:
-        reference_counts = (
-            Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
-        )
-        for ngram in counts:
-            max_counts[ngram] = max(max_counts.get(ngram, 0), reference_counts[ngram])
-    # Assigns the intersection between hypothesis and references' counts.
-    clipped_counts = {
-        ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
-    }
-    numerator = sum(clipped_counts.values())
-    # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
-    # Usually this happens when the ngram order is > len(reference).
-    denominator = max(1, sum(counts.values()))
-    return Fraction(numerator, denominator, _normalize=False)
-def closest_ref_length(references, hyp_len):
-    """
-    This function finds the reference that is the closest length to the
-    hypothesis. The closest reference length is referred to as *r* variable
-    from the brevity penalty formula in Papineni et. al. (2002)
-    :param references: A list of reference translations.
-    :type references: list(list(str))
-    :param hyp_len: The length of the hypothesis.
-    :type hyp_len: int
-    :return: The length of the reference that's closest to the hypothesis.
-    :rtype: int
-    """
-    ref_lens = (len(reference) for reference in references)
-    closest_ref_len = min(
-        ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
-    )
-    return closest_ref_len
-def brevity_penalty(closest_ref_len, hyp_len):
-    """
-    Calculate brevity penalty.
-    As the modified n-gram precision still has the problem from the short
-    length sentence, brevity penalty is used to modify the overall BLEU
-    score according to length.
-    An example from the paper. There are three references with length 12, 15
-    and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
-        >>> reference1 = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
-        >>> reference2 = list('aaaaaaaaaaaaaaa')   # i.e. ['a'] * 15
-        >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
-        >>> hypothesis = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
-        >>> references = [reference1, reference2, reference3]
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len)
-        1.0
-    In case a hypothesis translation is shorter than the references, penalty is
-    applied.
-        >>> references = [['a'] * 28, ['a'] * 28]
-        >>> hypothesis = ['a'] * 12
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len)
-        0.2635971381157267
-    The length of the closest reference is used to compute the penalty. If the
-    length of a hypothesis is 12, and the reference lengths are 13 and 2, the
-    penalty is applied because the hypothesis length (12) is less then the
-    closest reference length (13).
-        >>> references = [['a'] * 13, ['a'] * 2]
-        >>> hypothesis = ['a'] * 12
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
-        0.9200...
-    The brevity penalty doesn't depend on reference order. More importantly,
-    when two reference sentences are at the same distance, the shortest
-    reference sentence length is used.
-        >>> references = [['a'] * 13, ['a'] * 11]
-        >>> hypothesis = ['a'] * 12
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(reversed(references), hyp_len)
-        >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
-        >>> bp1 == bp2 == 1
-        True
-    A test example from mteval-v13a.pl (starting from the line 705):
-        >>> references = [['a'] * 11, ['a'] * 8]
-        >>> hypothesis = ['a'] * 7
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
-        0.8668...
-        >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
-        >>> hypothesis = ['a'] * 7
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len)
-        1.0
-    :param hyp_len: The length of the hypothesis for a single sentence OR the
-    sum of all the hypotheses' lengths for a corpus
-    :type hyp_len: int
-    :param closest_ref_len: The length of the closest reference for a single
-    hypothesis OR the sum of all the closest references for every hypotheses.
-    :type closest_ref_len: int
-    :return: BLEU's brevity penalty.
-    :rtype: float
-    """
-    if hyp_len > closest_ref_len:
-        return 1
-    # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
-    elif hyp_len == 0:
-        return 0
-    else:
-        return math.exp(1 - closest_ref_len / hyp_len)
-class SmoothingFunction:
-    """
-    This is an implementation of the smoothing techniques
-    for segment-level BLEU scores that was presented in
-    Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
-    Smoothing Techniques for Sentence-Level BLEU. In WMT14.
-    http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
-    """
-    def __init__(self, epsilon=0.1, alpha=5, k=5):
-        """
-        This will initialize the parameters required for the various smoothing
-        techniques, the default values are set to the numbers used in the
-        experiments from Chen and Cherry (2014).
-        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
-        ...                 'that', 'the', 'military', 'always', 'obeys', 'the',
-        ...                 'commands', 'of', 'the', 'party']
-        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
-        ...               'that', 'the', 'military', 'will', 'forever', 'heed',
-        ...               'Party', 'commands']
-        >>> chencherry = SmoothingFunction()
-        >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
-        0.4489...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
-        0.4905...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
-        0.4135...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
-        0.4905...
-        :param epsilon: the epsilon value use in method 1
-        :type epsilon: float
-        :param alpha: the alpha value use in method 6
-        :type alpha: int
-        :param k: the k value use in method 4
-        :type k: int
-        """
-        self.epsilon = epsilon
-        self.alpha = alpha
-        self.k = k
-    def method0(self, p_n, *args, **kwargs):
-        """
-        No smoothing.
-        """
-        p_n_new = []
-        for i, p_i in enumerate(p_n):
-            if p_i.numerator != 0:
-                p_n_new.append(p_i)
-            else:
-                _msg = str(
-                    "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
-                    "Therefore the BLEU score evaluates to 0, independently of\n"
-                    "how many N-gram overlaps of lower order it contains.\n"
-                    "Consider using lower n-gram order or use "
-                    "SmoothingFunction()"
-                ).format(i + 1)
-                warnings.warn(_msg)
-                # When numerator==0 where denonminator==0 or !=0, the result
-                # for the precision score should be equal to 0 or undefined.
-                # Due to BLEU geometric mean computation in logarithm space,
-                # we we need to take the return sys.float_info.min such that
-                # math.log(sys.float_info.min) returns a 0 precision score.
-                p_n_new.append(sys.float_info.min)
-        return p_n_new
-    def method1(self, p_n, *args, **kwargs):
-        """
-        Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
-        """
-        return [
-            (p_i.numerator + self.epsilon) / p_i.denominator
-            if p_i.numerator == 0
-            else p_i
-            for p_i in p_n
-        ]
-    def method2(self, p_n, *args, **kwargs):
-        """
-        Smoothing method 2: Add 1 to both numerator and denominator from
-        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
-        machine translation quality using longest common subsequence and
-        skip-bigram statistics. In ACL04.
-        """
-        return [
-            Fraction(p_i.numerator + 1, p_i.denominator + 1, _normalize=False)
-            for p_i in p_n
-        ]
-    def method3(self, p_n, *args, **kwargs):
-        """
-        Smoothing method 3: NIST geometric sequence smoothing
-        The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
-        precision score whose matching n-gram count is null.
-        k is 1 for the first 'n' value for which the n-gram match count is null/
-        For example, if the text contains:
-         - one 2-gram match
-         - and (consequently) two 1-gram matches
-        the n-gram count for each individual precision score would be:
-         - n=1  =>  prec_count = 2     (two unigrams)
-         - n=2  =>  prec_count = 1     (one bigram)
-         - n=3  =>  prec_count = 1/2   (no trigram,  taking 'smoothed' value of 1 / ( 2^k ), with k=1)
-         - n=4  =>  prec_count = 1/4   (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
-        """
-        incvnt = 1  # From the mteval-v13a.pl, it's referred to as k.
-        for i, p_i in enumerate(p_n):
-            if p_i.numerator == 0:
-                p_n[i] = 1 / (2 ** incvnt * p_i.denominator)
-                incvnt += 1
-        return p_n
-    def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 4:
-        Shorter translations may have inflated precision values due to having
-        smaller denominators; therefore, we give them proportionally
-        smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
-        suggests dividing by 1/ln(len(T)), where T is the length of the translation.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        for i, p_i in enumerate(p_n):
-            if p_i.numerator == 0 and hyp_len != 0:
-                incvnt = i + 1 * self.k / math.log(
-                    hyp_len
-                )  # Note that this K is different from the K from NIST.
-                p_n[i] = incvnt / p_i.denominator
-        return p_n
-    def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 5:
-        The matched counts for similar values of n should be similar. To a
-        calculate the n-gram matched count, it averages the n−1, n and n+1 gram
-        matched counts.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        m = {}
-        # Requires an precision value for an addition ngram order.
-        p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
-        m[-1] = p_n[0] + 1
-        for i, p_i in enumerate(p_n):
-            p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
-            m[i] = p_n[i]
-        return p_n
-    def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 6:
-        Interpolates the maximum likelihood estimate of the precision *p_n* with
-        a prior estimate *pi0*. The prior is estimated by assuming that the ratio
-        between pn and pn−1 will be the same as that between pn−1 and pn−2; from
-        Gao and He (2013) Training MRF-Based Phrase Translation Models using
-        Gradient Ascent. In NAACL.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        # This smoothing only works when p_1 and p_2 is non-zero.
-        # Raise an error with an appropriate message when the input is too short
-        # to use this smoothing technique.
-        assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
-        for i, p_i in enumerate(p_n):
-            if i in [0, 1]:  # Skips the first 2 orders of ngrams.
-                continue
-            else:
-                pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
-                # No. of ngrams in translation that matches the reference.
-                m = p_i.numerator
-                # No. of ngrams in translation.
-                l = sum(1 for _ in ngrams(hypothesis, i + 1))
-                # Calculates the interpolated precision.
-                p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
-        return p_n
-    def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 7:
-        Interpolates methods 4 and 5.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        p_n = self.method4(p_n, references, hypothesis, hyp_len)
-        p_n = self.method5(p_n, references, hypothesis, hyp_len)
-        return p_n

eval/code_bleu.py DELETED Viewed

@@ -1,44 +0,0 @@
-import bleu
-import weighted_ngram_match
-import syntax_match
-import dataflow_match
-def calc(predictions, references):
-    lang = "python"
-    alpha, beta, gamma, theta = (0.1, 0.1, 0.4, 0.4)
-    tokenized_pres = [x.split() for x in predictions]
-    tokenized_refs = [[x.split() for x in reference] for reference in references]
-    ngram_match_score = bleu.corpus_bleu(tokenized_refs, tokenized_pres)
-    keywords = [x.strip() for x in open('./src/eval/keywords/python.txt', 'r', encoding='utf-8').readlines()]
-    def make_weights(reference_tokens, key_word_list):
-        return {token: 1 if token in key_word_list else 0.2 for token in reference_tokens}
-    tokenized_refs_with_weights = [[[reference_tokens, make_weights(reference_tokens, keywords)] \
-                                    for reference_tokens in reference] for reference in tokenized_refs]
-    weighted_ngram_match_score = weighted_ngram_match.corpus_bleu(tokenized_refs_with_weights, tokenized_pres)
-    # calculate syntax match
-    syntax_match_score = syntax_match.corpus_syntax_match(references, predictions, lang)
-    # calculate dataflow match
-    dataflow_match_score = dataflow_match.corpus_dataflow_match(references, predictions, lang)
-    code_bleu_score = alpha * ngram_match_score \
-                      + beta * weighted_ngram_match_score \
-                      + gamma * syntax_match_score \
-                      + theta * dataflow_match_score
-    return {
-        'ngram_match_score': ngram_match_score,
-        'weighted_ngram_match_score': weighted_ngram_match_score,
-        'syntax_match_score': syntax_match_score,
-        'dataflow_match_score': dataflow_match_score,
-        'code_bleu_score': code_bleu_score
-    }

eval/dataflow_match.py DELETED Viewed

@@ -1,148 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
-from parser import DFG_python, DFG_java, DFG_ruby, DFG_go, DFG_php, DFG_javascript, DFG_csharp
-from parser import (remove_comments_and_docstrings,
-                    tree_to_token_index,
-                    index_to_code_token,
-                    tree_to_variable_index)
-from tree_sitter import Language, Parser
-import pdb
-dfg_function = {
-    'python': DFG_python,
-    'java': DFG_java,
-    'ruby': DFG_ruby,
-    'go': DFG_go,
-    'php': DFG_php,
-    'javascript': DFG_javascript,
-    'c_sharp': DFG_csharp,
-}
-def calc_dataflow_match(references, candidate, lang):
-    return corpus_dataflow_match([references], [candidate], lang)
-def corpus_dataflow_match(references, candidates, lang):
-    LANGUAGE = Language('./src/eval/parser/my-languages.so', lang)
-    parser = Parser()
-    parser.set_language(LANGUAGE)
-    parser = [parser, dfg_function[lang]]
-    match_count = 0
-    total_count = 0
-    for i in range(len(candidates)):
-        references_sample = references[i]
-        candidate = candidates[i]
-        for reference in references_sample:
-            try:
-                candidate = remove_comments_and_docstrings(candidate, 'java')
-            except:
-                pass
-            try:
-                reference = remove_comments_and_docstrings(reference, 'java')
-            except:
-                pass
-            cand_dfg = get_data_flow(candidate, parser)
-            ref_dfg = get_data_flow(reference, parser)
-            normalized_cand_dfg = normalize_dataflow(cand_dfg)
-            normalized_ref_dfg = normalize_dataflow(ref_dfg)
-            if len(normalized_ref_dfg) > 0:
-                total_count += len(normalized_ref_dfg)
-                for dataflow in normalized_ref_dfg:
-                    if dataflow in normalized_cand_dfg:
-                        match_count += 1
-                        normalized_cand_dfg.remove(dataflow)
-    if total_count == 0:
-        print(
-            "WARNING: There is no reference data-flows extracted from the whole corpus, and the data-flow match score degenerates to 0. Please consider ignoring this score.")
-        return 0
-    score = match_count / total_count
-    return score
-def get_data_flow(code, parser):
-    try:
-        tree = parser[0].parse(bytes(code, 'utf8'))
-        root_node = tree.root_node
-        tokens_index = tree_to_token_index(root_node)
-        code = code.split('\n')
-        code_tokens = [index_to_code_token(x, code) for x in tokens_index]
-        index_to_code = {}
-        for idx, (index, code) in enumerate(zip(tokens_index, code_tokens)):
-            index_to_code[index] = (idx, code)
-        try:
-            DFG, _ = parser[1](root_node, index_to_code, {})
-        except:
-            DFG = []
-        DFG = sorted(DFG, key=lambda x: x[1])
-        indexs = set()
-        for d in DFG:
-            if len(d[-1]) != 0:
-                indexs.add(d[1])
-            for x in d[-1]:
-                indexs.add(x)
-        new_DFG = []
-        for d in DFG:
-            if d[1] in indexs:
-                new_DFG.append(d)
-        codes = code_tokens
-        dfg = new_DFG
-    except:
-        codes = code.split()
-        dfg = []
-    # merge nodes
-    dic = {}
-    for d in dfg:
-        if d[1] not in dic:
-            dic[d[1]] = d
-        else:
-            dic[d[1]] = (d[0], d[1], d[2], list(set(dic[d[1]][3] + d[3])), list(set(dic[d[1]][4] + d[4])))
-    DFG = []
-    for d in dic:
-        DFG.append(dic[d])
-    dfg = DFG
-    return dfg
-def normalize_dataflow_item(dataflow_item):
-    var_name = dataflow_item[0]
-    var_pos = dataflow_item[1]
-    relationship = dataflow_item[2]
-    par_vars_name_list = dataflow_item[3]
-    par_vars_pos_list = dataflow_item[4]
-    var_names = list(set(par_vars_name_list + [var_name]))
-    norm_names = {}
-    for i in range(len(var_names)):
-        norm_names[var_names[i]] = 'var_' + str(i)
-    norm_var_name = norm_names[var_name]
-    relationship = dataflow_item[2]
-    norm_par_vars_name_list = [norm_names[x] for x in par_vars_name_list]
-    return (norm_var_name, relationship, norm_par_vars_name_list)
-def normalize_dataflow(dataflow):
-    var_dict = {}
-    i = 0
-    normalized_dataflow = []
-    for item in dataflow:
-        var_name = item[0]
-        relationship = item[2]
-        par_vars_name_list = item[3]
-        for name in par_vars_name_list:
-            if name not in var_dict:
-                var_dict[name] = 'var_' + str(i)
-                i += 1
-        if var_name not in var_dict:
-            var_dict[var_name] = 'var_' + str(i)
-            i += 1
-        normalized_dataflow.append((var_dict[var_name], relationship, [var_dict[x] for x in par_vars_name_list]))
-    return normalized_dataflow

eval/keywords/python.txt DELETED Viewed

@@ -1,35 +0,0 @@
-False
-None
-True
-and
-as
-assert
-async
-await
-break
-class
-continue
-def
-del
-elif
-else
-except
-finally
-for
-from
-global
-if
-import
-in
-is
-lambda
-nonlocal
-not
-or
-pass
-raise
-return
-try
-while
-with
-yield

eval/parser/DFG.py DELETED Viewed

@@ -1,1186 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
-from tree_sitter import Language, Parser
-from .utils import (remove_comments_and_docstrings,
-                    tree_to_token_index,
-                    index_to_code_token,
-                    tree_to_variable_index)
-def DFG_python(root_node, index_to_code, states):
-    assignment = ['assignment', 'augmented_assignment', 'for_in_clause']
-    if_statement = ['if_statement']
-    for_statement = ['for_statement']
-    while_statement = ['while_statement']
-    do_first_statement = ['for_in_clause']
-    def_statement = ['default_parameter']
-    states = states.copy()
-    if (len(root_node.children) == 0 or root_node.type in ['string_literal', 'string',
-                                                           'character_literal']) and root_node.type != 'comment':
-        idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
-        if root_node.type == code:
-            return [], states
-        elif code in states:
-            return [(code, idx, 'comesFrom', [code], states[code].copy())], states
-        else:
-            if root_node.type == 'identifier':
-                states[code] = [idx]
-            return [(code, idx, 'comesFrom', [], [])], states
-    elif root_node.type in def_statement:
-        name = root_node.child_by_field_name('name')
-        value = root_node.child_by_field_name('value')
-        DFG = []
-        if value is None:
-            indexs = tree_to_variable_index(name, index_to_code)
-            for index in indexs:
-                idx, code = index_to_code[index]
-                DFG.append((code, idx, 'comesFrom', [], []))
-                states[code] = [idx]
-            return sorted(DFG, key=lambda x: x[1]), states
-        else:
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            temp, states = DFG_python(value, index_to_code, states)
-            DFG += temp
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in assignment:
-        if root_node.type == 'for_in_clause':
-            right_nodes = [root_node.children[-1]]
-            left_nodes = [root_node.child_by_field_name('left')]
-        else:
-            if root_node.child_by_field_name('right') is None:
-                return [], states
-            left_nodes = [x for x in root_node.child_by_field_name('left').children if x.type != ',']
-            right_nodes = [x for x in root_node.child_by_field_name('right').children if x.type != ',']
-            if len(right_nodes) != len(left_nodes):
-                left_nodes = [root_node.child_by_field_name('left')]
-                right_nodes = [root_node.child_by_field_name('right')]
-            if len(left_nodes) == 0:
-                left_nodes = [root_node.child_by_field_name('left')]
-            if len(right_nodes) == 0:
-                right_nodes = [root_node.child_by_field_name('right')]
-        DFG = []
-        for node in right_nodes:
-            temp, states = DFG_python(node, index_to_code, states)
-            DFG += temp
-        for left_node, right_node in zip(left_nodes, right_nodes):
-            left_tokens_index = tree_to_variable_index(left_node, index_to_code)
-            right_tokens_index = tree_to_variable_index(right_node, index_to_code)
-            temp = []
-            for token1_index in left_tokens_index:
-                idx1, code1 = index_to_code[token1_index]
-                temp.append((code1, idx1, 'computedFrom', [index_to_code[x][1] for x in right_tokens_index],
-                             [index_to_code[x][0] for x in right_tokens_index]))
-                states[code1] = [idx1]
-            DFG += temp
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in if_statement:
-        DFG = []
-        current_states = states.copy()
-        others_states = []
-        tag = False
-        if 'else' in root_node.type:
-            tag = True
-        for child in root_node.children:
-            if 'else' in child.type:
-                tag = True
-            if child.type not in ['elif_clause', 'else_clause']:
-                temp, current_states = DFG_python(child, index_to_code, current_states)
-                DFG += temp
-            else:
-                temp, new_states = DFG_python(child, index_to_code, states)
-                DFG += temp
-                others_states.append(new_states)
-        others_states.append(current_states)
-        if tag is False:
-            others_states.append(states)
-        new_states = {}
-        for dic in others_states:
-            for key in dic:
-                if key not in new_states:
-                    new_states[key] = dic[key].copy()
-                else:
-                    new_states[key] += dic[key]
-        for key in new_states:
-            new_states[key] = sorted(list(set(new_states[key])))
-        return sorted(DFG, key=lambda x: x[1]), new_states
-    elif root_node.type in for_statement:
-        DFG = []
-        for i in range(2):
-            right_nodes = [x for x in root_node.child_by_field_name('right').children if x.type != ',']
-            left_nodes = [x for x in root_node.child_by_field_name('left').children if x.type != ',']
-            if len(right_nodes) != len(left_nodes):
-                left_nodes = [root_node.child_by_field_name('left')]
-                right_nodes = [root_node.child_by_field_name('right')]
-            if len(left_nodes) == 0:
-                left_nodes = [root_node.child_by_field_name('left')]
-            if len(right_nodes) == 0:
-                right_nodes = [root_node.child_by_field_name('right')]
-            for node in right_nodes:
-                temp, states = DFG_python(node, index_to_code, states)
-                DFG += temp
-            for left_node, right_node in zip(left_nodes, right_nodes):
-                left_tokens_index = tree_to_variable_index(left_node, index_to_code)
-                right_tokens_index = tree_to_variable_index(right_node, index_to_code)
-                temp = []
-                for token1_index in left_tokens_index:
-                    idx1, code1 = index_to_code[token1_index]
-                    temp.append((code1, idx1, 'computedFrom', [index_to_code[x][1] for x in right_tokens_index],
-                                 [index_to_code[x][0] for x in right_tokens_index]))
-                    states[code1] = [idx1]
-                DFG += temp
-            if root_node.children[-1].type == "block":
-                temp, states = DFG_python(root_node.children[-1], index_to_code, states)
-                DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in while_statement:
-        DFG = []
-        for i in range(2):
-            for child in root_node.children:
-                temp, states = DFG_python(child, index_to_code, states)
-                DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    else:
-        DFG = []
-        for child in root_node.children:
-            if child.type in do_first_statement:
-                temp, states = DFG_python(child, index_to_code, states)
-                DFG += temp
-        for child in root_node.children:
-            if child.type not in do_first_statement:
-                temp, states = DFG_python(child, index_to_code, states)
-                DFG += temp
-        return sorted(DFG, key=lambda x: x[1]), states
-def DFG_java(root_node, index_to_code, states):
-    assignment = ['assignment_expression']
-    def_statement = ['variable_declarator']
-    increment_statement = ['update_expression']
-    if_statement = ['if_statement', 'else']
-    for_statement = ['for_statement']
-    enhanced_for_statement = ['enhanced_for_statement']
-    while_statement = ['while_statement']
-    do_first_statement = []
-    states = states.copy()
-    if (len(root_node.children) == 0 or root_node.type in ['string_literal', 'string',
-                                                           'character_literal']) and root_node.type != 'comment':
-        idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
-        if root_node.type == code:
-            return [], states
-        elif code in states:
-            return [(code, idx, 'comesFrom', [code], states[code].copy())], states
-        else:
-            if root_node.type == 'identifier':
-                states[code] = [idx]
-            return [(code, idx, 'comesFrom', [], [])], states
-    elif root_node.type in def_statement:
-        name = root_node.child_by_field_name('name')
-        value = root_node.child_by_field_name('value')
-        DFG = []
-        if value is None:
-            indexs = tree_to_variable_index(name, index_to_code)
-            for index in indexs:
-                idx, code = index_to_code[index]
-                DFG.append((code, idx, 'comesFrom', [], []))
-                states[code] = [idx]
-            return sorted(DFG, key=lambda x: x[1]), states
-        else:
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            temp, states = DFG_java(value, index_to_code, states)
-            DFG += temp
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in assignment:
-        left_nodes = root_node.child_by_field_name('left')
-        right_nodes = root_node.child_by_field_name('right')
-        DFG = []
-        temp, states = DFG_java(right_nodes, index_to_code, states)
-        DFG += temp
-        name_indexs = tree_to_variable_index(left_nodes, index_to_code)
-        value_indexs = tree_to_variable_index(right_nodes, index_to_code)
-        for index1 in name_indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in value_indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in increment_statement:
-        DFG = []
-        indexs = tree_to_variable_index(root_node, index_to_code)
-        for index1 in indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in if_statement:
-        DFG = []
-        current_states = states.copy()
-        others_states = []
-        flag = False
-        tag = False
-        if 'else' in root_node.type:
-            tag = True
-        for child in root_node.children:
-            if 'else' in child.type:
-                tag = True
-            if child.type not in if_statement and flag is False:
-                temp, current_states = DFG_java(child, index_to_code, current_states)
-                DFG += temp
-            else:
-                flag = True
-                temp, new_states = DFG_java(child, index_to_code, states)
-                DFG += temp
-                others_states.append(new_states)
-        others_states.append(current_states)
-        if tag is False:
-            others_states.append(states)
-        new_states = {}
-        for dic in others_states:
-            for key in dic:
-                if key not in new_states:
-                    new_states[key] = dic[key].copy()
-                else:
-                    new_states[key] += dic[key]
-        for key in new_states:
-            new_states[key] = sorted(list(set(new_states[key])))
-        return sorted(DFG, key=lambda x: x[1]), new_states
-    elif root_node.type in for_statement:
-        DFG = []
-        for child in root_node.children:
-            temp, states = DFG_java(child, index_to_code, states)
-            DFG += temp
-        flag = False
-        for child in root_node.children:
-            if flag:
-                temp, states = DFG_java(child, index_to_code, states)
-                DFG += temp
-            elif child.type == "local_variable_declaration":
-                flag = True
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in enhanced_for_statement:
-        name = root_node.child_by_field_name('name')
-        value = root_node.child_by_field_name('value')
-        body = root_node.child_by_field_name('body')
-        DFG = []
-        for i in range(2):
-            temp, states = DFG_java(value, index_to_code, states)
-            DFG += temp
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            temp, states = DFG_java(body, index_to_code, states)
-            DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in while_statement:
-        DFG = []
-        for i in range(2):
-            for child in root_node.children:
-                temp, states = DFG_java(child, index_to_code, states)
-                DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    else:
-        DFG = []
-        for child in root_node.children:
-            if child.type in do_first_statement:
-                temp, states = DFG_java(child, index_to_code, states)
-                DFG += temp
-        for child in root_node.children:
-            if child.type not in do_first_statement:
-                temp, states = DFG_java(child, index_to_code, states)
-                DFG += temp
-        return sorted(DFG, key=lambda x: x[1]), states
-def DFG_csharp(root_node, index_to_code, states):
-    assignment = ['assignment_expression']
-    def_statement = ['variable_declarator']
-    increment_statement = ['postfix_unary_expression']
-    if_statement = ['if_statement', 'else']
-    for_statement = ['for_statement']
-    enhanced_for_statement = ['for_each_statement']
-    while_statement = ['while_statement']
-    do_first_statement = []
-    states = states.copy()
-    if (len(root_node.children) == 0 or root_node.type in ['string_literal', 'string',
-                                                           'character_literal']) and root_node.type != 'comment':
-        idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
-        if root_node.type == code:
-            return [], states
-        elif code in states:
-            return [(code, idx, 'comesFrom', [code], states[code].copy())], states
-        else:
-            if root_node.type == 'identifier':
-                states[code] = [idx]
-            return [(code, idx, 'comesFrom', [], [])], states
-    elif root_node.type in def_statement:
-        if len(root_node.children) == 2:
-            name = root_node.children[0]
-            value = root_node.children[1]
-        else:
-            name = root_node.children[0]
-            value = None
-        DFG = []
-        if value is None:
-            indexs = tree_to_variable_index(name, index_to_code)
-            for index in indexs:
-                idx, code = index_to_code[index]
-                DFG.append((code, idx, 'comesFrom', [], []))
-                states[code] = [idx]
-            return sorted(DFG, key=lambda x: x[1]), states
-        else:
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            temp, states = DFG_csharp(value, index_to_code, states)
-            DFG += temp
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in assignment:
-        left_nodes = root_node.child_by_field_name('left')
-        right_nodes = root_node.child_by_field_name('right')
-        DFG = []
-        temp, states = DFG_csharp(right_nodes, index_to_code, states)
-        DFG += temp
-        name_indexs = tree_to_variable_index(left_nodes, index_to_code)
-        value_indexs = tree_to_variable_index(right_nodes, index_to_code)
-        for index1 in name_indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in value_indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in increment_statement:
-        DFG = []
-        indexs = tree_to_variable_index(root_node, index_to_code)
-        for index1 in indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in if_statement:
-        DFG = []
-        current_states = states.copy()
-        others_states = []
-        flag = False
-        tag = False
-        if 'else' in root_node.type:
-            tag = True
-        for child in root_node.children:
-            if 'else' in child.type:
-                tag = True
-            if child.type not in if_statement and flag is False:
-                temp, current_states = DFG_csharp(child, index_to_code, current_states)
-                DFG += temp
-            else:
-                flag = True
-                temp, new_states = DFG_csharp(child, index_to_code, states)
-                DFG += temp
-                others_states.append(new_states)
-        others_states.append(current_states)
-        if tag is False:
-            others_states.append(states)
-        new_states = {}
-        for dic in others_states:
-            for key in dic:
-                if key not in new_states:
-                    new_states[key] = dic[key].copy()
-                else:
-                    new_states[key] += dic[key]
-        for key in new_states:
-            new_states[key] = sorted(list(set(new_states[key])))
-        return sorted(DFG, key=lambda x: x[1]), new_states
-    elif root_node.type in for_statement:
-        DFG = []
-        for child in root_node.children:
-            temp, states = DFG_csharp(child, index_to_code, states)
-            DFG += temp
-        flag = False
-        for child in root_node.children:
-            if flag:
-                temp, states = DFG_csharp(child, index_to_code, states)
-                DFG += temp
-            elif child.type == "local_variable_declaration":
-                flag = True
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in enhanced_for_statement:
-        name = root_node.child_by_field_name('left')
-        value = root_node.child_by_field_name('right')
-        body = root_node.child_by_field_name('body')
-        DFG = []
-        for i in range(2):
-            temp, states = DFG_csharp(value, index_to_code, states)
-            DFG += temp
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            temp, states = DFG_csharp(body, index_to_code, states)
-            DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in while_statement:
-        DFG = []
-        for i in range(2):
-            for child in root_node.children:
-                temp, states = DFG_csharp(child, index_to_code, states)
-                DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    else:
-        DFG = []
-        for child in root_node.children:
-            if child.type in do_first_statement:
-                temp, states = DFG_csharp(child, index_to_code, states)
-                DFG += temp
-        for child in root_node.children:
-            if child.type not in do_first_statement:
-                temp, states = DFG_csharp(child, index_to_code, states)
-                DFG += temp
-        return sorted(DFG, key=lambda x: x[1]), states
-def DFG_ruby(root_node, index_to_code, states):
-    assignment = ['assignment', 'operator_assignment']
-    if_statement = ['if', 'elsif', 'else', 'unless', 'when']
-    for_statement = ['for']
-    while_statement = ['while_modifier', 'until']
-    do_first_statement = []
-    def_statement = ['keyword_parameter']
-    if (len(root_node.children) == 0 or root_node.type in ['string_literal', 'string',
-                                                           'character_literal']) and root_node.type != 'comment':
-        states = states.copy()
-        idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
-        if root_node.type == code:
-            return [], states
-        elif code in states:
-            return [(code, idx, 'comesFrom', [code], states[code].copy())], states
-        else:
-            if root_node.type == 'identifier':
-                states[code] = [idx]
-            return [(code, idx, 'comesFrom', [], [])], states
-    elif root_node.type in def_statement:
-        name = root_node.child_by_field_name('name')
-        value = root_node.child_by_field_name('value')
-        DFG = []
-        if value is None:
-            indexs = tree_to_variable_index(name, index_to_code)
-            for index in indexs:
-                idx, code = index_to_code[index]
-                DFG.append((code, idx, 'comesFrom', [], []))
-                states[code] = [idx]
-            return sorted(DFG, key=lambda x: x[1]), states
-        else:
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            temp, states = DFG_ruby(value, index_to_code, states)
-            DFG += temp
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in assignment:
-        left_nodes = [x for x in root_node.child_by_field_name('left').children if x.type != ',']
-        right_nodes = [x for x in root_node.child_by_field_name('right').children if x.type != ',']
-        if len(right_nodes) != len(left_nodes):
-            left_nodes = [root_node.child_by_field_name('left')]
-            right_nodes = [root_node.child_by_field_name('right')]
-        if len(left_nodes) == 0:
-            left_nodes = [root_node.child_by_field_name('left')]
-        if len(right_nodes) == 0:
-            right_nodes = [root_node.child_by_field_name('right')]
-        if root_node.type == "operator_assignment":
-            left_nodes = [root_node.children[0]]
-            right_nodes = [root_node.children[-1]]
-        DFG = []
-        for node in right_nodes:
-            temp, states = DFG_ruby(node, index_to_code, states)
-            DFG += temp
-        for left_node, right_node in zip(left_nodes, right_nodes):
-            left_tokens_index = tree_to_variable_index(left_node, index_to_code)
-            right_tokens_index = tree_to_variable_index(right_node, index_to_code)
-            temp = []
-            for token1_index in left_tokens_index:
-                idx1, code1 = index_to_code[token1_index]
-                temp.append((code1, idx1, 'computedFrom', [index_to_code[x][1] for x in right_tokens_index],
-                             [index_to_code[x][0] for x in right_tokens_index]))
-                states[code1] = [idx1]
-            DFG += temp
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in if_statement:
-        DFG = []
-        current_states = states.copy()
-        others_states = []
-        tag = False
-        if 'else' in root_node.type:
-            tag = True
-        for child in root_node.children:
-            if 'else' in child.type:
-                tag = True
-            if child.type not in if_statement:
-                temp, current_states = DFG_ruby(child, index_to_code, current_states)
-                DFG += temp
-            else:
-                temp, new_states = DFG_ruby(child, index_to_code, states)
-                DFG += temp
-                others_states.append(new_states)
-        others_states.append(current_states)
-        if tag is False:
-            others_states.append(states)
-        new_states = {}
-        for dic in others_states:
-            for key in dic:
-                if key not in new_states:
-                    new_states[key] = dic[key].copy()
-                else:
-                    new_states[key] += dic[key]
-        for key in new_states:
-            new_states[key] = sorted(list(set(new_states[key])))
-        return sorted(DFG, key=lambda x: x[1]), new_states
-    elif root_node.type in for_statement:
-        DFG = []
-        for i in range(2):
-            left_nodes = [root_node.child_by_field_name('pattern')]
-            right_nodes = [root_node.child_by_field_name('value')]
-            assert len(right_nodes) == len(left_nodes)
-            for node in right_nodes:
-                temp, states = DFG_ruby(node, index_to_code, states)
-                DFG += temp
-            for left_node, right_node in zip(left_nodes, right_nodes):
-                left_tokens_index = tree_to_variable_index(left_node, index_to_code)
-                right_tokens_index = tree_to_variable_index(right_node, index_to_code)
-                temp = []
-                for token1_index in left_tokens_index:
-                    idx1, code1 = index_to_code[token1_index]
-                    temp.append((code1, idx1, 'computedFrom', [index_to_code[x][1] for x in right_tokens_index],
-                                 [index_to_code[x][0] for x in right_tokens_index]))
-                    states[code1] = [idx1]
-                DFG += temp
-            temp, states = DFG_ruby(root_node.child_by_field_name('body'), index_to_code, states)
-            DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in while_statement:
-        DFG = []
-        for i in range(2):
-            for child in root_node.children:
-                temp, states = DFG_ruby(child, index_to_code, states)
-                DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    else:
-        DFG = []
-        for child in root_node.children:
-            if child.type in do_first_statement:
-                temp, states = DFG_ruby(child, index_to_code, states)
-                DFG += temp
-        for child in root_node.children:
-            if child.type not in do_first_statement:
-                temp, states = DFG_ruby(child, index_to_code, states)
-                DFG += temp
-        return sorted(DFG, key=lambda x: x[1]), states
-def DFG_go(root_node, index_to_code, states):
-    assignment = ['assignment_statement', ]
-    def_statement = ['var_spec']
-    increment_statement = ['inc_statement']
-    if_statement = ['if_statement', 'else']
-    for_statement = ['for_statement']
-    enhanced_for_statement = []
-    while_statement = []
-    do_first_statement = []
-    states = states.copy()
-    if (len(root_node.children) == 0 or root_node.type in ['string_literal', 'string',
-                                                           'character_literal']) and root_node.type != 'comment':
-        idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
-        if root_node.type == code:
-            return [], states
-        elif code in states:
-            return [(code, idx, 'comesFrom', [code], states[code].copy())], states
-        else:
-            if root_node.type == 'identifier':
-                states[code] = [idx]
-            return [(code, idx, 'comesFrom', [], [])], states
-    elif root_node.type in def_statement:
-        name = root_node.child_by_field_name('name')
-        value = root_node.child_by_field_name('value')
-        DFG = []
-        if value is None:
-            indexs = tree_to_variable_index(name, index_to_code)
-            for index in indexs:
-                idx, code = index_to_code[index]
-                DFG.append((code, idx, 'comesFrom', [], []))
-                states[code] = [idx]
-            return sorted(DFG, key=lambda x: x[1]), states
-        else:
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            temp, states = DFG_go(value, index_to_code, states)
-            DFG += temp
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in assignment:
-        left_nodes = root_node.child_by_field_name('left')
-        right_nodes = root_node.child_by_field_name('right')
-        DFG = []
-        temp, states = DFG_go(right_nodes, index_to_code, states)
-        DFG += temp
-        name_indexs = tree_to_variable_index(left_nodes, index_to_code)
-        value_indexs = tree_to_variable_index(right_nodes, index_to_code)
-        for index1 in name_indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in value_indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in increment_statement:
-        DFG = []
-        indexs = tree_to_variable_index(root_node, index_to_code)
-        for index1 in indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in if_statement:
-        DFG = []
-        current_states = states.copy()
-        others_states = []
-        flag = False
-        tag = False
-        if 'else' in root_node.type:
-            tag = True
-        for child in root_node.children:
-            if 'else' in child.type:
-                tag = True
-            if child.type not in if_statement and flag is False:
-                temp, current_states = DFG_go(child, index_to_code, current_states)
-                DFG += temp
-            else:
-                flag = True
-                temp, new_states = DFG_go(child, index_to_code, states)
-                DFG += temp
-                others_states.append(new_states)
-        others_states.append(current_states)
-        if tag is False:
-            others_states.append(states)
-        new_states = {}
-        for dic in others_states:
-            for key in dic:
-                if key not in new_states:
-                    new_states[key] = dic[key].copy()
-                else:
-                    new_states[key] += dic[key]
-        for key in states:
-            if key not in new_states:
-                new_states[key] = states[key]
-            else:
-                new_states[key] += states[key]
-        for key in new_states:
-            new_states[key] = sorted(list(set(new_states[key])))
-        return sorted(DFG, key=lambda x: x[1]), new_states
-    elif root_node.type in for_statement:
-        DFG = []
-        for child in root_node.children:
-            temp, states = DFG_go(child, index_to_code, states)
-            DFG += temp
-        flag = False
-        for child in root_node.children:
-            if flag:
-                temp, states = DFG_go(child, index_to_code, states)
-                DFG += temp
-            elif child.type == "for_clause":
-                if child.child_by_field_name('update') is not None:
-                    temp, states = DFG_go(child.child_by_field_name('update'), index_to_code, states)
-                    DFG += temp
-                flag = True
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    else:
-        DFG = []
-        for child in root_node.children:
-            if child.type in do_first_statement:
-                temp, states = DFG_go(child, index_to_code, states)
-                DFG += temp
-        for child in root_node.children:
-            if child.type not in do_first_statement:
-                temp, states = DFG_go(child, index_to_code, states)
-                DFG += temp
-        return sorted(DFG, key=lambda x: x[1]), states
-def DFG_php(root_node, index_to_code, states):
-    assignment = ['assignment_expression', 'augmented_assignment_expression']
-    def_statement = ['simple_parameter']
-    increment_statement = ['update_expression']
-    if_statement = ['if_statement', 'else_clause']
-    for_statement = ['for_statement']
-    enhanced_for_statement = ['foreach_statement']
-    while_statement = ['while_statement']
-    do_first_statement = []
-    states = states.copy()
-    if (len(root_node.children) == 0 or root_node.type in ['string_literal', 'string',
-                                                           'character_literal']) and root_node.type != 'comment':
-        idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
-        if root_node.type == code:
-            return [], states
-        elif code in states:
-            return [(code, idx, 'comesFrom', [code], states[code].copy())], states
-        else:
-            if root_node.type == 'identifier':
-                states[code] = [idx]
-            return [(code, idx, 'comesFrom', [], [])], states
-    elif root_node.type in def_statement:
-        name = root_node.child_by_field_name('name')
-        value = root_node.child_by_field_name('default_value')
-        DFG = []
-        if value is None:
-            indexs = tree_to_variable_index(name, index_to_code)
-            for index in indexs:
-                idx, code = index_to_code[index]
-                DFG.append((code, idx, 'comesFrom', [], []))
-                states[code] = [idx]
-            return sorted(DFG, key=lambda x: x[1]), states
-        else:
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            temp, states = DFG_php(value, index_to_code, states)
-            DFG += temp
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in assignment:
-        left_nodes = root_node.child_by_field_name('left')
-        right_nodes = root_node.child_by_field_name('right')
-        DFG = []
-        temp, states = DFG_php(right_nodes, index_to_code, states)
-        DFG += temp
-        name_indexs = tree_to_variable_index(left_nodes, index_to_code)
-        value_indexs = tree_to_variable_index(right_nodes, index_to_code)
-        for index1 in name_indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in value_indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in increment_statement:
-        DFG = []
-        indexs = tree_to_variable_index(root_node, index_to_code)
-        for index1 in indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in if_statement:
-        DFG = []
-        current_states = states.copy()
-        others_states = []
-        flag = False
-        tag = False
-        if 'else' in root_node.type:
-            tag = True
-        for child in root_node.children:
-            if 'else' in child.type:
-                tag = True
-            if child.type not in if_statement and flag is False:
-                temp, current_states = DFG_php(child, index_to_code, current_states)
-                DFG += temp
-            else:
-                flag = True
-                temp, new_states = DFG_php(child, index_to_code, states)
-                DFG += temp
-                others_states.append(new_states)
-        others_states.append(current_states)
-        new_states = {}
-        for dic in others_states:
-            for key in dic:
-                if key not in new_states:
-                    new_states[key] = dic[key].copy()
-                else:
-                    new_states[key] += dic[key]
-        for key in states:
-            if key not in new_states:
-                new_states[key] = states[key]
-            else:
-                new_states[key] += states[key]
-        for key in new_states:
-            new_states[key] = sorted(list(set(new_states[key])))
-        return sorted(DFG, key=lambda x: x[1]), new_states
-    elif root_node.type in for_statement:
-        DFG = []
-        for child in root_node.children:
-            temp, states = DFG_php(child, index_to_code, states)
-            DFG += temp
-        flag = False
-        for child in root_node.children:
-            if flag:
-                temp, states = DFG_php(child, index_to_code, states)
-                DFG += temp
-            elif child.type == "assignment_expression":
-                flag = True
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in enhanced_for_statement:
-        name = None
-        value = None
-        for child in root_node.children:
-            if child.type == 'variable_name' and value is None:
-                value = child
-            elif child.type == 'variable_name' and name is None:
-                name = child
-                break
-        body = root_node.child_by_field_name('body')
-        DFG = []
-        for i in range(2):
-            temp, states = DFG_php(value, index_to_code, states)
-            DFG += temp
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            temp, states = DFG_php(body, index_to_code, states)
-            DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in while_statement:
-        DFG = []
-        for i in range(2):
-            for child in root_node.children:
-                temp, states = DFG_php(child, index_to_code, states)
-                DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    else:
-        DFG = []
-        for child in root_node.children:
-            if child.type in do_first_statement:
-                temp, states = DFG_php(child, index_to_code, states)
-                DFG += temp
-        for child in root_node.children:
-            if child.type not in do_first_statement:
-                temp, states = DFG_php(child, index_to_code, states)
-                DFG += temp
-        return sorted(DFG, key=lambda x: x[1]), states
-def DFG_javascript(root_node, index_to_code, states):
-    assignment = ['assignment_pattern', 'augmented_assignment_expression']
-    def_statement = ['variable_declarator']
-    increment_statement = ['update_expression']
-    if_statement = ['if_statement', 'else']
-    for_statement = ['for_statement']
-    enhanced_for_statement = []
-    while_statement = ['while_statement']
-    do_first_statement = []
-    states = states.copy()
-    if (len(root_node.children) == 0 or root_node.type in ['string_literal', 'string',
-                                                           'character_literal']) and root_node.type != 'comment':
-        idx, code = index_to_code[(root_node.start_point, root_node.end_point)]
-        if root_node.type == code:
-            return [], states
-        elif code in states:
-            return [(code, idx, 'comesFrom', [code], states[code].copy())], states
-        else:
-            if root_node.type == 'identifier':
-                states[code] = [idx]
-            return [(code, idx, 'comesFrom', [], [])], states
-    elif root_node.type in def_statement:
-        name = root_node.child_by_field_name('name')
-        value = root_node.child_by_field_name('value')
-        DFG = []
-        if value is None:
-            indexs = tree_to_variable_index(name, index_to_code)
-            for index in indexs:
-                idx, code = index_to_code[index]
-                DFG.append((code, idx, 'comesFrom', [], []))
-                states[code] = [idx]
-            return sorted(DFG, key=lambda x: x[1]), states
-        else:
-            name_indexs = tree_to_variable_index(name, index_to_code)
-            value_indexs = tree_to_variable_index(value, index_to_code)
-            temp, states = DFG_javascript(value, index_to_code, states)
-            DFG += temp
-            for index1 in name_indexs:
-                idx1, code1 = index_to_code[index1]
-                for index2 in value_indexs:
-                    idx2, code2 = index_to_code[index2]
-                    DFG.append((code1, idx1, 'comesFrom', [code2], [idx2]))
-                states[code1] = [idx1]
-            return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in assignment:
-        left_nodes = root_node.child_by_field_name('left')
-        right_nodes = root_node.child_by_field_name('right')
-        DFG = []
-        temp, states = DFG_javascript(right_nodes, index_to_code, states)
-        DFG += temp
-        name_indexs = tree_to_variable_index(left_nodes, index_to_code)
-        value_indexs = tree_to_variable_index(right_nodes, index_to_code)
-        for index1 in name_indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in value_indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in increment_statement:
-        DFG = []
-        indexs = tree_to_variable_index(root_node, index_to_code)
-        for index1 in indexs:
-            idx1, code1 = index_to_code[index1]
-            for index2 in indexs:
-                idx2, code2 = index_to_code[index2]
-                DFG.append((code1, idx1, 'computedFrom', [code2], [idx2]))
-            states[code1] = [idx1]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in if_statement:
-        DFG = []
-        current_states = states.copy()
-        others_states = []
-        flag = False
-        tag = False
-        if 'else' in root_node.type:
-            tag = True
-        for child in root_node.children:
-            if 'else' in child.type:
-                tag = True
-            if child.type not in if_statement and flag is False:
-                temp, current_states = DFG_javascript(child, index_to_code, current_states)
-                DFG += temp
-            else:
-                flag = True
-                temp, new_states = DFG_javascript(child, index_to_code, states)
-                DFG += temp
-                others_states.append(new_states)
-        others_states.append(current_states)
-        if tag is False:
-            others_states.append(states)
-        new_states = {}
-        for dic in others_states:
-            for key in dic:
-                if key not in new_states:
-                    new_states[key] = dic[key].copy()
-                else:
-                    new_states[key] += dic[key]
-        for key in states:
-            if key not in new_states:
-                new_states[key] = states[key]
-            else:
-                new_states[key] += states[key]
-        for key in new_states:
-            new_states[key] = sorted(list(set(new_states[key])))
-        return sorted(DFG, key=lambda x: x[1]), new_states
-    elif root_node.type in for_statement:
-        DFG = []
-        for child in root_node.children:
-            temp, states = DFG_javascript(child, index_to_code, states)
-            DFG += temp
-        flag = False
-        for child in root_node.children:
-            if flag:
-                temp, states = DFG_javascript(child, index_to_code, states)
-                DFG += temp
-            elif child.type == "variable_declaration":
-                flag = True
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    elif root_node.type in while_statement:
-        DFG = []
-        for i in range(2):
-            for child in root_node.children:
-                temp, states = DFG_javascript(child, index_to_code, states)
-                DFG += temp
-        dic = {}
-        for x in DFG:
-            if (x[0], x[1], x[2]) not in dic:
-                dic[(x[0], x[1], x[2])] = [x[3], x[4]]
-            else:
-                dic[(x[0], x[1], x[2])][0] = list(set(dic[(x[0], x[1], x[2])][0] + x[3]))
-                dic[(x[0], x[1], x[2])][1] = sorted(list(set(dic[(x[0], x[1], x[2])][1] + x[4])))
-        DFG = [(x[0], x[1], x[2], y[0], y[1]) for x, y in sorted(dic.items(), key=lambda t: t[0][1])]
-        return sorted(DFG, key=lambda x: x[1]), states
-    else:
-        DFG = []
-        for child in root_node.children:
-            if child.type in do_first_statement:
-                temp, states = DFG_javascript(child, index_to_code, states)
-                DFG += temp
-        for child in root_node.children:
-            if child.type not in do_first_statement:
-                temp, states = DFG_javascript(child, index_to_code, states)
-                DFG += temp
-        return sorted(DFG, key=lambda x: x[1]), states

eval/parser/__init__.py DELETED Viewed

@@ -1,8 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
-from .utils import (remove_comments_and_docstrings,
-                   tree_to_token_index,
-                   index_to_code_token,
-                   tree_to_variable_index)
-from .DFG import DFG_python,DFG_java,DFG_ruby,DFG_go,DFG_php,DFG_javascript,DFG_csharp

eval/parser/build.py DELETED Viewed

@@ -1,15 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
-from tree_sitter import Language, Parser
-Language.build_library(
-  # Store the library in the `build` directory
-    'my-languages.so',
-  # Include one or more languages
-  [
-    'tree-sitter-python'
-  ]
-)

eval/parser/build.sh DELETED Viewed

	@@ -1,2 +0,0 @@
1	- git clone https://github.com/tree-sitter/tree-sitter-python
2	- python build.py

eval/parser/utils.py DELETED Viewed

@@ -1,101 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
-import re
-from io import StringIO
-import  tokenize
-def remove_comments_and_docstrings(source,lang):
-    if lang in ['python']:
-        """
-        Returns 'source' minus comments and docstrings.
-        """
-        io_obj = StringIO(source)
-        out = ""
-        prev_toktype = tokenize.INDENT
-        last_lineno = -1
-        last_col = 0
-        for tok in tokenize.generate_tokens(io_obj.readline):
-            token_type = tok[0]
-            token_string = tok[1]
-            start_line, start_col = tok[2]
-            end_line, end_col = tok[3]
-            ltext = tok[4]
-            if start_line > last_lineno:
-                last_col = 0
-            if start_col > last_col:
-                out += (" " * (start_col - last_col))
-            # Remove comments:
-            if token_type == tokenize.COMMENT:
-                pass
-            # This series of conditionals removes docstrings:
-            elif token_type == tokenize.STRING:
-                if prev_toktype != tokenize.INDENT:
-            # This is likely a docstring; double-check we're not inside an operator:
-                    if prev_toktype != tokenize.NEWLINE:
-                        if start_col > 0:
-                            out += token_string
-            else:
-                out += token_string
-            prev_toktype = token_type
-            last_col = end_col
-            last_lineno = end_line
-        temp=[]
-        for x in out.split('\n'):
-            if x.strip()!="":
-                temp.append(x)
-        return '\n'.join(temp)
-    elif lang in ['ruby']:
-        return source
-    else:
-        def replacer(match):
-            s = match.group(0)
-            if s.startswith('/'):
-                return " " # note: a space and not an empty string
-            else:
-                return s
-        pattern = re.compile(
-            r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
-            re.DOTALL | re.MULTILINE
-        )
-        temp=[]
-        for x in re.sub(pattern, replacer, source).split('\n'):
-            if x.strip()!="":
-                temp.append(x)
-        return '\n'.join(temp)
-def tree_to_token_index(root_node):
-    if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
-        return [(root_node.start_point,root_node.end_point)]
-    else:
-        code_tokens=[]
-        for child in root_node.children:
-            code_tokens+=tree_to_token_index(child)
-        return code_tokens
-def tree_to_variable_index(root_node,index_to_code):
-    if (len(root_node.children)==0 or root_node.type in ['string_literal','string','character_literal']) and root_node.type!='comment':
-        index=(root_node.start_point,root_node.end_point)
-        _,code=index_to_code[index]
-        if root_node.type!=code:
-            return [(root_node.start_point,root_node.end_point)]
-        else:
-            return []
-    else:
-        code_tokens=[]
-        for child in root_node.children:
-            code_tokens+=tree_to_variable_index(child,index_to_code)
-        return code_tokens
-def index_to_code_token(index,code):
-    start_point=index[0]
-    end_point=index[1]
-    if start_point[0]==end_point[0]:
-        s=code[start_point[0]][start_point[1]:end_point[1]]
-    else:
-        s=""
-        s+=code[start_point[0]][start_point[1]:]
-        for i in range(start_point[0]+1,end_point[0]):
-            s+=code[i]
-        s+=code[end_point[0]][:end_point[1]]
-    return s

eval/syntax_match.py DELETED Viewed

@@ -1,76 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
-from parser import DFG_python, DFG_java, DFG_ruby, DFG_go, DFG_php, DFG_javascript, DFG_csharp
-from parser import (remove_comments_and_docstrings,
-                    tree_to_token_index,
-                    index_to_code_token,
-                    tree_to_variable_index)
-from tree_sitter import Language, Parser
-dfg_function = {
-    'python': DFG_python,
-    'java': DFG_java,
-    'ruby': DFG_ruby,
-    'go': DFG_go,
-    'php': DFG_php,
-    'javascript': DFG_javascript,
-    'c_sharp': DFG_csharp,
-}
-def calc_syntax_match(references, candidate, lang):
-    return corpus_syntax_match([references], [candidate], lang)
-def corpus_syntax_match(references, candidates, lang):
-    LANGUAGE = Language('./src/eval/parser/my-languages.so', lang)
-    parser = Parser()
-    parser.set_language(LANGUAGE)
-    match_count = 0
-    total_count = 0
-    for i in range(len(candidates)):
-        references_sample = references[i]
-        candidate = candidates[i]
-        for reference in references_sample:
-            try:
-                candidate = remove_comments_and_docstrings(candidate, LANGUAGE)
-            except:
-                pass
-            try:
-                reference = remove_comments_and_docstrings(reference, LANGUAGE)
-            except:
-                pass
-            candidate_tree = parser.parse(bytes(candidate, 'utf8')).root_node
-            reference_tree = parser.parse(bytes(reference, 'utf8')).root_node
-            def get_all_sub_trees(root_node):
-                node_stack = []
-                sub_tree_sexp_list = []
-                depth = 1
-                node_stack.append([root_node, depth])
-                while len(node_stack) != 0:
-                    cur_node, cur_depth = node_stack.pop()
-                    sub_tree_sexp_list.append([cur_node.sexp(), cur_depth])
-                    for child_node in cur_node.children:
-                        if len(child_node.children) != 0:
-                            depth = cur_depth + 1
-                            node_stack.append([child_node, depth])
-                return sub_tree_sexp_list
-            cand_sexps = [x[0] for x in get_all_sub_trees(candidate_tree)]
-            ref_sexps = get_all_sub_trees(reference_tree)
-            # print(cand_sexps)
-            # print(ref_sexps)
-            for sub_tree, depth in ref_sexps:
-                if sub_tree in cand_sexps:
-                    match_count += 1
-            total_count += len(ref_sexps)
-    score = match_count / total_count
-    return score

eval/utils.py DELETED Viewed

@@ -1,106 +0,0 @@
-# Natural Language Toolkit: Utility functions
-#
-# Copyright (C) 2001-2020 NLTK Project
-# Author: Steven Bird <stevenbird1@gmail.com>
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-from itertools import chain
-def pad_sequence(
-    sequence,
-    n,
-    pad_left=False,
-    pad_right=False,
-    left_pad_symbol=None,
-    right_pad_symbol=None,
-):
-    """
-    Returns a padded sequence of items before ngram extraction.
-        >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
-        ['<s>', 1, 2, 3, 4, 5, '</s>']
-        >>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
-        ['<s>', 1, 2, 3, 4, 5]
-        >>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
-        [1, 2, 3, 4, 5, '</s>']
-    :param sequence: the source data to be padded
-    :type sequence: sequence or iter
-    :param n: the degree of the ngrams
-    :type n: int
-    :param pad_left: whether the ngrams should be left-padded
-    :type pad_left: bool
-    :param pad_right: whether the ngrams should be right-padded
-    :type pad_right: bool
-    :param left_pad_symbol: the symbol to use for left padding (default is None)
-    :type left_pad_symbol: any
-    :param right_pad_symbol: the symbol to use for right padding (default is None)
-    :type right_pad_symbol: any
-    :rtype: sequence or iter
-    """
-    sequence = iter(sequence)
-    if pad_left:
-        sequence = chain((left_pad_symbol,) * (n - 1), sequence)
-    if pad_right:
-        sequence = chain(sequence, (right_pad_symbol,) * (n - 1))
-    return sequence
-# add a flag to pad the sequence so we get peripheral ngrams?
-def ngrams(
-    sequence,
-    n,
-    pad_left=False,
-    pad_right=False,
-    left_pad_symbol=None,
-    right_pad_symbol=None,
-):
-    """
-    Return the ngrams generated from a sequence of items, as an iterator.
-    For example:
-        >>> from nltk.util import ngrams
-        >>> list(ngrams([1,2,3,4,5], 3))
-        [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
-    Wrap with list for a list version of this function.  Set pad_left
-    or pad_right to true in order to get additional ngrams:
-        >>> list(ngrams([1,2,3,4,5], 2, pad_right=True))
-        [(1, 2), (2, 3), (3, 4), (4, 5), (5, None)]
-        >>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
-        [(1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
-        >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
-        [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5)]
-        >>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
-        [('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
-    :param sequence: the source data to be converted into ngrams
-    :type sequence: sequence or iter
-    :param n: the degree of the ngrams
-    :type n: int
-    :param pad_left: whether the ngrams should be left-padded
-    :type pad_left: bool
-    :param pad_right: whether the ngrams should be right-padded
-    :type pad_right: bool
-    :param left_pad_symbol: the symbol to use for left padding (default is None)
-    :type left_pad_symbol: any
-    :param right_pad_symbol: the symbol to use for right padding (default is None)
-    :type right_pad_symbol: any
-    :rtype: sequence or iter
-    """
-    sequence = pad_sequence(
-        sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol
-    )
-    history = []
-    while n > 1:
-        # PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator
-        try:
-            next_item = next(sequence)
-        except StopIteration:
-            # no more data, terminate the generator
-            return
-        history.append(next_item)
-        n -= 1
-    for item in sequence:
-        history.append(item)
-        yield tuple(history)
-        del history[0]

eval/weighted_ngram_match.py DELETED Viewed

@@ -1,558 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license.
-# Natural Language Toolkit: BLEU Score
-#
-# Copyright (C) 2001-2020 NLTK Project
-# Authors: Chin Yee Lee, Hengfeng Li, Ruxin Hou, Calvin Tanujaya Lim
-# Contributors: Björn Mattsson, Dmitrijs Milajevs, Liling Tan
-# URL: <http://nltk.org/>
-# For license information, see LICENSE.TXT
-"""BLEU score implementation."""
-import math
-import sys
-from fractions import Fraction
-import warnings
-from collections import Counter
-from utils import ngrams
-import pdb
-def sentence_bleu(
-    references,
-    hypothesis,
-    weights=(0.25, 0.25, 0.25, 0.25),
-    smoothing_function=None,
-    auto_reweigh=False,
-):
-    """
-    Calculate BLEU score (Bilingual Evaluation Understudy) from
-    Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
-    "BLEU: a method for automatic evaluation of machine translation."
-    In Proceedings of ACL. http://www.aclweb.org/anthology/P02-1040.pdf
-    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-    ...               'ensures', 'that', 'the', 'military', 'always',
-    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']
-    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
-    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
-    ...               'that', 'party', 'direct']
-    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
-    ...               'heed', 'Party', 'commands']
-    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-    ...               'guarantees', 'the', 'military', 'forces', 'always',
-    ...               'being', 'under', 'the', 'command', 'of', 'the',
-    ...               'Party']
-    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
-    ...               'of', 'the', 'party']
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
-    0.5045...
-    If there is no ngrams overlap for any order of n-grams, BLEU returns the
-    value 0. This is because the precision for the order of n-grams without
-    overlap is 0, and the geometric mean in the final BLEU score computation
-    multiplies the 0 with the precision of other n-grams. This results in 0
-    (independently of the precision of the othe n-gram orders). The following
-    example has zero 3-gram and 4-gram overlaps:
-    >>> round(sentence_bleu([reference1, reference2, reference3], hypothesis2),4) # doctest: +ELLIPSIS
-    0.0
-    To avoid this harsh behaviour when no ngram overlaps are found a smoothing
-    function can be used.
-    >>> chencherry = SmoothingFunction()
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2,
-    ...     smoothing_function=chencherry.method1) # doctest: +ELLIPSIS
-    0.0370...
-    The default BLEU calculates a score for up to 4-grams using uniform
-    weights (this is called BLEU-4). To evaluate your translations with
-    higher/lower order ngrams, use customized weights. E.g. when accounting
-    for up to 5-grams with uniform weights (this is called BLEU-5) use:
-    >>> weights = (1./5., 1./5., 1./5., 1./5., 1./5.)
-    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) # doctest: +ELLIPSIS
-    0.3920...
-    :param references: reference sentences
-    :type references: list(list(str))
-    :param hypothesis: a hypothesis sentence
-    :type hypothesis: list(str)
-    :param weights: weights for unigrams, bigrams, trigrams and so on
-    :type weights: list(float)
-    :param smoothing_function:
-    :type smoothing_function: SmoothingFunction
-    :param auto_reweigh: Option to re-normalize the weights uniformly.
-    :type auto_reweigh: bool
-    :return: The sentence-level BLEU score.
-    :rtype: float
-    """
-    return corpus_bleu(
-        [references], [hypothesis], weights, smoothing_function, auto_reweigh
-    )
-def corpus_bleu(
-    list_of_references,
-    hypotheses,
-    weights=(0.25, 0.25, 0.25, 0.25),
-    smoothing_function=None,
-    auto_reweigh=False,
-):
-    """
-    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
-    the hypotheses and their respective references.
-    Instead of averaging the sentence level BLEU scores (i.e. marco-average
-    precision), the original BLEU metric (Papineni et al. 2002) accounts for
-    the micro-average precision (i.e. summing the numerators and denominators
-    for each hypothesis-reference(s) pairs before the division).
-    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
-    ...         'ensures', 'that', 'the', 'military', 'always',
-    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
-    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
-    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
-    ...          'heed', 'Party', 'commands']
-    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
-    ...          'guarantees', 'the', 'military', 'forces', 'always',
-    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
-    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
-    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
-    ...          'of', 'the', 'party']
-    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
-    ...         'interested', 'in', 'world', 'history']
-    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
-    ...          'because', 'he', 'read', 'the', 'book']
-    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
-    >>> hypotheses = [hyp1, hyp2]
-    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
-    0.5920...
-    The example below show that corpus_bleu() is different from averaging
-    sentence_bleu() for hypotheses
-    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
-    >>> score2 = sentence_bleu([ref2a], hyp2)
-    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
-    0.6223...
-    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
-    :type list_of_references: list(list(list(str)))
-    :param hypotheses: a list of hypothesis sentences
-    :type hypotheses: list(list(str))
-    :param weights: weights for unigrams, bigrams, trigrams and so on
-    :type weights: list(float)
-    :param smoothing_function:
-    :type smoothing_function: SmoothingFunction
-    :param auto_reweigh: Option to re-normalize the weights uniformly.
-    :type auto_reweigh: bool
-    :return: The corpus-level BLEU score.
-    :rtype: float
-    """
-    # Before proceeding to compute BLEU, perform sanity checks.
-    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
-    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
-    hyp_lengths, ref_lengths = 0, 0
-    assert len(list_of_references) == len(hypotheses), (
-        "The number of hypotheses and their reference(s) should be the " "same "
-    )
-    # Iterate through each hypothesis and their corresponding references.
-    for references, hypothesis in zip(list_of_references, hypotheses):
-        # For each order of ngram, calculate the numerator and
-        # denominator for the corpus-level modified precision.
-        for i, _ in enumerate(weights, start=1):
-            p_i_numeraotr, p_i_denominator = modified_recall(references, hypothesis, i)
-            p_numerators[i] += p_i_numeraotr
-            p_denominators[i] += p_i_denominator
-        # Calculate the hypothesis length and the closest reference length.
-        # Adds them to the corpus-level hypothesis and reference counts.
-        hyp_len = len(hypothesis)
-        hyp_lengths += hyp_len
-        ref_lengths += closest_ref_length(references, hyp_len)
-    # Calculate corpus-level brevity penalty.
-    bp = brevity_penalty(ref_lengths, hyp_lengths)
-    # Uniformly re-weighting based on maximum hypothesis lengths if largest
-    # order of n-grams < 4 and weights is set at default.
-    if auto_reweigh:
-        if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25):
-            weights = (1 / hyp_lengths,) * hyp_lengths
-    # Collects the various recall values for the different ngram orders.
-    p_n = [
-        (p_numerators[i], p_denominators[i])
-        for i, _ in enumerate(weights, start=1)
-    ]
-    # Returns 0 if there's no matching n-grams
-    # We only need to check for p_numerators[1] == 0, since if there's
-    # no unigrams, there won't be any higher order ngrams.
-    if p_numerators[1] == 0:
-        return 0
-    # If there's no smoothing, set use method0 from SmoothinFunction class.
-    if not smoothing_function:
-        smoothing_function = SmoothingFunction().method1
-    # Smoothen the modified precision.
-    # Note: smoothing_function() may convert values into floats;
-    #       it tries to retain the Fraction object as much as the
-    #       smoothing method allows.
-    p_n = smoothing_function(
-        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
-    )
-    # pdb.set_trace()
-    s = (w_i * math.log(p_i[0]/p_i[1]) for w_i, p_i in zip(weights, p_n))
-    s = bp * math.exp(math.fsum(s))
-    return s
-def modified_recall(references, hypothesis, n):
-    """
-    Calculate modified ngram recall.
-    :param references: A list of reference translations.
-    :type references: list(list(str))
-    :param hypothesis: A hypothesis translation.
-    :type hypothesis: list(str)
-    :param n: The ngram order.
-    :type n: int
-    :return: BLEU's modified precision for the nth order ngram.
-    :rtype: Fraction
-    """
-    # Extracts all ngrams in hypothesis
-    # Set an empty Counter if hypothesis is empty.
-    # pdb.set_trace()
-    numerator = 0
-    denominator = 0
-    counts = Counter(ngrams(hypothesis, n)) if len(hypothesis) >= n else Counter()
-    # Extract a union of references' counts.
-    # max_counts = reduce(or_, [Counter(ngrams(ref, n)) for ref in references])
-    max_counts = {}
-    for reference_and_weights in references:
-        reference = reference_and_weights[0]
-        weights = reference_and_weights[1]
-        reference_counts = (
-            Counter(ngrams(reference, n)) if len(reference) >= n else Counter()
-        )
-        # for ngram in reference_counts:
-        #     max_counts[ngram] = max(max_counts.get(ngram, 0), counts[ngram])
-        clipped_counts = {
-            ngram: min(count, counts[ngram]) for ngram, count in reference_counts.items()
-        }
-        # reweight
-        if n == 1 and len(weights) == len(reference_counts):
-            def weighted_sum(weights, counts):
-                sum_counts = 0
-                for ngram, count in counts.items():
-                    sum_counts += count * (weights[ngram[0]] if ngram[0] in weights else 1)
-                return sum_counts
-            numerator += weighted_sum(weights, clipped_counts)
-            denominator += max(1, weighted_sum(weights, reference_counts))
-        else:
-            numerator += sum(clipped_counts.values())
-            denominator += max(1, sum(reference_counts.values()))
-        # # Assigns the intersection between hypothesis and references' counts.
-        # clipped_counts = {
-        #     ngram: min(count, max_counts[ngram]) for ngram, count in counts.items()
-        # }
-        # numerator += sum(clipped_counts.values())
-        # # Ensures that denominator is minimum 1 to avoid ZeroDivisionError.
-        # # Usually this happens when the ngram order is > len(reference).
-        # denominator += max(1, sum(counts.values()))
-    #return Fraction(numerator, denominator, _normalize=False)
-    return numerator, denominator
-def closest_ref_length(references, hyp_len):
-    """
-    This function finds the reference that is the closest length to the
-    hypothesis. The closest reference length is referred to as *r* variable
-    from the brevity penalty formula in Papineni et. al. (2002)
-    :param references: A list of reference translations.
-    :type references: list(list(str))
-    :param hyp_len: The length of the hypothesis.
-    :type hyp_len: int
-    :return: The length of the reference that's closest to the hypothesis.
-    :rtype: int
-    """
-    ref_lens = (len(reference) for reference in references)
-    closest_ref_len = min(
-        ref_lens, key=lambda ref_len: (abs(ref_len - hyp_len), ref_len)
-    )
-    return closest_ref_len
-def brevity_penalty(closest_ref_len, hyp_len):
-    """
-    Calculate brevity penalty.
-    As the modified n-gram precision still has the problem from the short
-    length sentence, brevity penalty is used to modify the overall BLEU
-    score according to length.
-    An example from the paper. There are three references with length 12, 15
-    and 17. And a concise hypothesis of the length 12. The brevity penalty is 1.
-        >>> reference1 = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
-        >>> reference2 = list('aaaaaaaaaaaaaaa')   # i.e. ['a'] * 15
-        >>> reference3 = list('aaaaaaaaaaaaaaaaa') # i.e. ['a'] * 17
-        >>> hypothesis = list('aaaaaaaaaaaa')      # i.e. ['a'] * 12
-        >>> references = [reference1, reference2, reference3]
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len)
-        1.0
-    In case a hypothesis translation is shorter than the references, penalty is
-    applied.
-        >>> references = [['a'] * 28, ['a'] * 28]
-        >>> hypothesis = ['a'] * 12
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len)
-        0.2635971381157267
-    The length of the closest reference is used to compute the penalty. If the
-    length of a hypothesis is 12, and the reference lengths are 13 and 2, the
-    penalty is applied because the hypothesis length (12) is less then the
-    closest reference length (13).
-        >>> references = [['a'] * 13, ['a'] * 2]
-        >>> hypothesis = ['a'] * 12
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
-        0.9200...
-    The brevity penalty doesn't depend on reference order. More importantly,
-    when two reference sentences are at the same distance, the shortest
-    reference sentence length is used.
-        >>> references = [['a'] * 13, ['a'] * 11]
-        >>> hypothesis = ['a'] * 12
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> bp1 = brevity_penalty(closest_ref_len, hyp_len)
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(reversed(references), hyp_len)
-        >>> bp2 = brevity_penalty(closest_ref_len, hyp_len)
-        >>> bp1 == bp2 == 1
-        True
-    A test example from mteval-v13a.pl (starting from the line 705):
-        >>> references = [['a'] * 11, ['a'] * 8]
-        >>> hypothesis = ['a'] * 7
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len) # doctest: +ELLIPSIS
-        0.8668...
-        >>> references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
-        >>> hypothesis = ['a'] * 7
-        >>> hyp_len = len(hypothesis)
-        >>> closest_ref_len =  closest_ref_length(references, hyp_len)
-        >>> brevity_penalty(closest_ref_len, hyp_len)
-        1.0
-    :param hyp_len: The length of the hypothesis for a single sentence OR the
-    sum of all the hypotheses' lengths for a corpus
-    :type hyp_len: int
-    :param closest_ref_len: The length of the closest reference for a single
-    hypothesis OR the sum of all the closest references for every hypotheses.
-    :type closest_ref_len: int
-    :return: BLEU's brevity penalty.
-    :rtype: float
-    """
-    if hyp_len > closest_ref_len:
-        return 1
-    # If hypothesis is empty, brevity penalty = 0 should result in BLEU = 0.0
-    elif hyp_len == 0:
-        return 0
-    else:
-        return math.exp(1 - closest_ref_len / hyp_len)
-class SmoothingFunction:
-    """
-    This is an implementation of the smoothing techniques
-    for segment-level BLEU scores that was presented in
-    Boxing Chen and Collin Cherry (2014) A Systematic Comparison of
-    Smoothing Techniques for Sentence-Level BLEU. In WMT14.
-    http://acl2014.org/acl2014/W14-33/pdf/W14-3346.pdf
-    """
-    def __init__(self, epsilon=0.1, alpha=5, k=5):
-        """
-        This will initialize the parameters required for the various smoothing
-        techniques, the default values are set to the numbers used in the
-        experiments from Chen and Cherry (2014).
-        >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', 'ensures',
-        ...                 'that', 'the', 'military', 'always', 'obeys', 'the',
-        ...                 'commands', 'of', 'the', 'party']
-        >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', 'ensures',
-        ...               'that', 'the', 'military', 'will', 'forever', 'heed',
-        ...               'Party', 'commands']
-        >>> chencherry = SmoothingFunction()
-        >>> print(sentence_bleu([reference1], hypothesis1)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method0)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method1)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method2)) # doctest: +ELLIPSIS
-        0.4489...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method3)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method4)) # doctest: +ELLIPSIS
-        0.4118...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method5)) # doctest: +ELLIPSIS
-        0.4905...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method6)) # doctest: +ELLIPSIS
-        0.4135...
-        >>> print(sentence_bleu([reference1], hypothesis1, smoothing_function=chencherry.method7)) # doctest: +ELLIPSIS
-        0.4905...
-        :param epsilon: the epsilon value use in method 1
-        :type epsilon: float
-        :param alpha: the alpha value use in method 6
-        :type alpha: int
-        :param k: the k value use in method 4
-        :type k: int
-        """
-        self.epsilon = epsilon
-        self.alpha = alpha
-        self.k = k
-    def method0(self, p_n, *args, **kwargs):
-        """
-        No smoothing.
-        """
-        p_n_new = []
-        for i, p_i in enumerate(p_n):
-            if p_i[0] != 0:
-                p_n_new.append(p_i)
-            else:
-                _msg = str(
-                    "\nThe hypothesis contains 0 counts of {}-gram overlaps.\n"
-                    "Therefore the BLEU score evaluates to 0, independently of\n"
-                    "how many N-gram overlaps of lower order it contains.\n"
-                    "Consider using lower n-gram order or use "
-                    "SmoothingFunction()"
-                ).format(i + 1)
-                warnings.warn(_msg)
-                # When numerator==0 where denonminator==0 or !=0, the result
-                # for the precision score should be equal to 0 or undefined.
-                # Due to BLEU geometric mean computation in logarithm space,
-                # we we need to take the return sys.float_info.min such that
-                # math.log(sys.float_info.min) returns a 0 precision score.
-                p_n_new.append(sys.float_info.min)
-        return p_n_new
-    def method1(self, p_n, *args, **kwargs):
-        """
-        Smoothing method 1: Add *epsilon* counts to precision with 0 counts.
-        """
-        return [
-            ((p_i[0] + self.epsilon),  p_i[1])
-            if p_i[0] == 0
-            else p_i
-            for p_i in p_n
-        ]
-    def method2(self, p_n, *args, **kwargs):
-        """
-        Smoothing method 2: Add 1 to both numerator and denominator from
-        Chin-Yew Lin and Franz Josef Och (2004) Automatic evaluation of
-        machine translation quality using longest common subsequence and
-        skip-bigram statistics. In ACL04.
-        """
-        return [
-            (p_i[0] + 1, p_i[1] + 1)
-            for p_i in p_n
-        ]
-    def method3(self, p_n, *args, **kwargs):
-        """
-        Smoothing method 3: NIST geometric sequence smoothing
-        The smoothing is computed by taking 1 / ( 2^k ), instead of 0, for each
-        precision score whose matching n-gram count is null.
-        k is 1 for the first 'n' value for which the n-gram match count is null/
-        For example, if the text contains:
-         - one 2-gram match
-         - and (consequently) two 1-gram matches
-        the n-gram count for each individual precision score would be:
-         - n=1  =>  prec_count = 2     (two unigrams)
-         - n=2  =>  prec_count = 1     (one bigram)
-         - n=3  =>  prec_count = 1/2   (no trigram,  taking 'smoothed' value of 1 / ( 2^k ), with k=1)
-         - n=4  =>  prec_count = 1/4   (no fourgram, taking 'smoothed' value of 1 / ( 2^k ), with k=2)
-        """
-        incvnt = 1  # From the mteval-v13a.pl, it's referred to as k.
-        for i, p_i in enumerate(p_n):
-            if p_i.numerator == 0:
-                p_n[i] = 1 / (2 ** incvnt * p_i.denominator)
-                incvnt += 1
-        return p_n
-    def method4(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 4:
-        Shorter translations may have inflated precision values due to having
-        smaller denominators; therefore, we give them proportionally
-        smaller smoothed counts. Instead of scaling to 1/(2^k), Chen and Cherry
-        suggests dividing by 1/ln(len(T)), where T is the length of the translation.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        for i, p_i in enumerate(p_n):
-            if p_i.numerator == 0 and hyp_len != 0:
-                incvnt = i + 1 * self.k / math.log(
-                    hyp_len
-                )  # Note that this K is different from the K from NIST.
-                p_n[i] = incvnt / p_i.denominator
-        return p_n
-    def method5(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 5:
-        The matched counts for similar values of n should be similar. To a
-        calculate the n-gram matched count, it averages the n−1, n and n+1 gram
-        matched counts.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        m = {}
-        # Requires an precision value for an addition ngram order.
-        p_n_plus1 = p_n + [modified_precision(references, hypothesis, 5)]
-        m[-1] = p_n[0] + 1
-        for i, p_i in enumerate(p_n):
-            p_n[i] = (m[i - 1] + p_i + p_n_plus1[i + 1]) / 3
-            m[i] = p_n[i]
-        return p_n
-    def method6(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 6:
-        Interpolates the maximum likelihood estimate of the precision *p_n* with
-        a prior estimate *pi0*. The prior is estimated by assuming that the ratio
-        between pn and pn−1 will be the same as that between pn−1 and pn−2; from
-        Gao and He (2013) Training MRF-Based Phrase Translation Models using
-        Gradient Ascent. In NAACL.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        # This smoothing only works when p_1 and p_2 is non-zero.
-        # Raise an error with an appropriate message when the input is too short
-        # to use this smoothing technique.
-        assert p_n[2], "This smoothing method requires non-zero precision for bigrams."
-        for i, p_i in enumerate(p_n):
-            if i in [0, 1]:  # Skips the first 2 orders of ngrams.
-                continue
-            else:
-                pi0 = 0 if p_n[i - 2] == 0 else p_n[i - 1] ** 2 / p_n[i - 2]
-                # No. of ngrams in translation that matches the reference.
-                m = p_i.numerator
-                # No. of ngrams in translation.
-                l = sum(1 for _ in ngrams(hypothesis, i + 1))
-                # Calculates the interpolated precision.
-                p_n[i] = (m + self.alpha * pi0) / (l + self.alpha)
-        return p_n
-    def method7(self, p_n, references, hypothesis, hyp_len=None, *args, **kwargs):
-        """
-        Smoothing method 7:
-        Interpolates methods 4 and 5.
-        """
-        hyp_len = hyp_len if hyp_len else len(hypothesis)
-        p_n = self.method4(p_n, references, hypothesis, hyp_len)
-        p_n = self.method5(p_n, references, hypothesis, hyp_len)
-        return p_n

codebleu.py → metric-codebleu.py RENAMED Viewed

@@ -11,55 +11,56 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""TODO: Add a description here."""
-import evaluate
 import datasets
-import eval
-import os
-# TODO: Add BibTeX citation
 _CITATION = """\
-@misc{2009.10297,
-Author = {Shuo Ren and Daya Guo and Shuai Lu and Long Zhou and Shujie Liu and Duyu Tang and Neel Sundaresan and Ming Zhou and Ambrosio Blanco and Shuai Ma},
-Title = {CodeBLEU: a Method for Automatic Evaluation of Code Synthesis},
-Year = {2020},
-Eprint = {arXiv:2009.10297},
 }
 """
-# TODO: Add description of the module here
 _DESCRIPTION = """\
-This new module is designed to calculate the CodeBLEU score for code generation tasks.
 """
-# TODO: Add description of the arguments of the module here
 _KWARGS_DESCRIPTION = """
-Calculates how good are predictions given some references, using certain scores
 Args:
-    predictions: list of predictions to score.
-    references: list of reference for each prediction.
 Returns:
-    ngram_match_score
-    weighted_ngram_match_score
-    syntax_match_score
-    dataflow_match_score
-    code_bleu_score
 Examples:
-    Examples should be written in doctest format, and should illustrate how
-    to use the function.
-    >>> my_new_module = evaluate.load("my_new_module")
-    >>> results = my_new_module.compute(references=["def add(a, b): return a + b"], predictions=["def add(a, b): return a + b"])
     >>> print(results)
-    {'ngram_match_score': 1.0, 'weighted_ngram_match_score': 1.0, 'syntax_match_score': 1.0, 'dataflow_match_score': 1.0, 'code_bleu_score': 1.0}
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class codebleu(evaluate.Metric):
     def _info(self):
         return evaluate.MetricInfo(
             # This is the description that will appear on the modules page.
@@ -68,15 +69,38 @@ class codebleu(evaluate.Metric):
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
-            features=datasets.Features({
-                'predictions': datasets.Value('string'),
-                'references': datasets.Value('string'),
-            })
         )
-    def _download_and_prepare(self, dl_manager):
-        pass
-    def _compute(self, predictions, references):
         """Returns the scores"""
-        return eval.code_bleu.calc(predictions, references)

 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from codebleu import calc_codebleu
 import datasets
+import evaluate
 _CITATION = """\
+@misc{ren2020codebleu,
+      title={CodeBLEU: a Method for Automatic Evaluation of Code Synthesis},
+      author={Shuo Ren and Daya Guo and Shuai Lu and Long Zhou and Shujie Liu and Duyu Tang and Neel Sundaresan and Ming Zhou and Ambrosio Blanco and Shuai Ma},
+      year={2020},
+      eprint={2009.10297},
+      archivePrefix={arXiv},
+      primaryClass={cs.SE}
 }
 """
 _DESCRIPTION = """\
+Unofficial `CodeBLEU` implementation that supports Linux and MacOS.
 """
 _KWARGS_DESCRIPTION = """
+Calculate a weighted combination of `n-gram match (BLEU)`, `weighted n-gram match (BLEU-weighted)`, `AST match` and `data-flow match` scores.
 Args:
+    predictions: list of predictions to score. Each predictions
+        should be a string with tokens separated by spaces.
+    references: list of reference for each prediction. Each
+        reference should be a string with tokens separated by spaces.
+    language: programming language in ['java','js','c_sharp','php','c','python','cpp'].
+    weights: tuple of 4 floats to use as weights for scores. Defaults to (0.25, 0.25, 0.25, 0.25).
 Returns:
+    codebleu: resulting `CodeBLEU` score,
+    ngram_match_score: resulting `n-gram match (BLEU)` score,
+    weighted_ngram_match_score: resulting `weighted n-gram match (BLEU-weighted)` score,
+    syntax_match_score: resulting `AST match` score,
+    dataflow_match_score: resulting `data-flow match` score,
 Examples:
+    >>> metric = evaluate.load("k4black/codebleu")
+    >>> ref = "def sum ( first , second ) :\n return second + first"
+    >>> pred = "def add ( a , b ) :\n return a + b"
+    >>> results = metric.compute(references=[ref], predictions=[pred], language="python")
     >>> print(results)
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class codebleu(evaluate.Metric):
+    """CodeBLEU metric from CodexGLUE"""
     def _info(self):
         return evaluate.MetricInfo(
             # This is the description that will appear on the modules page.
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
+            features=[
+                datasets.Features(
+                    {
+                        "predictions": datasets.Value("string", id="sequence"),
+                        "references": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
+                        "lang": datasets.Value("string"),
+                        "weights": datasets.Value("string")
+                    }
+                )
+            ],
+            # Homepage of the module for documentation
+            homepage="https://github.com/k4black/codebleu",
+            # Additional links to the codebase or references
+            codebase_urls=["https://github.com/k4black/codebleu"],
+            reference_urls=[
+                "https://github.com/k4black/codebleu",
+                "https://github.com/microsoft/CodeXGLUE/tree/main/Code-Code/code-to-code-trans/evaluator",
+                "https://arxiv.org/abs/2009.10297",
+            ],
         )
+    def _compute(
+            self,
+            predictions,
+            references,
+            lang,weights=(0.25, 0.25, 0.25, 0.25)
+    ):
         """Returns the scores"""
+        return calc_codebleu(
+            references=references,
+            predictions=predictions,
+            lang=lang,
+            weights=weights
+        )

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- git+https://github.com/huggingface/evaluate@main


1	+ git+https://github.com/huggingface/evaluate@main
2	+ codebleu>=0.2.0,<1.0.0