Source code for math_util

"""Commonly used functions not available in the Python2 standard library."""
from __future__ import division

import math
import random

from sys import float_info

NORM_EPSILON = math.pow(float_info.epsilon, 0.25) # half-precision works for machine learning

[docs]def mean(values):
    values = list(values)
    return math.fsum(map(float, values)) / len(values)

[docs]def median(values):
    values = list(values)
    values.sort()
    return values[len(values) // 2]

[docs]def median2(values):
    """
    Returns the median of the input values;
    if there are an even number of inputs, returns the mean of the middle two.
    """
    values = list(values)
    n = len(values)
    if n <= 2:
        return mean(values)
    values.sort()
    if (n % 2) == 1:
        return values[n//2]
    i = n//2
    return (values[i - 1] + values[i])/2.0

[docs]def tmean(values, trim=0.25):
    """
    Returns the trimmed mean of the input values,
    with the fraction trimmed from each end being the second argument;
    requires 0.0 <= trim <= 0.5. If ``trim`` is over 0.25, returns the
    weighted mean of tmean(values, 0.25) and median2(values).
    """
    values = list(values)
    if (len(values) < 3) or (not trim):
        return mean(values)
    elif trim == 0.5:
        return median2(values)
    elif not (0.0 < trim < 0.5):
        raise ValueError(
            "Trim must be in 0.0 - 0.5 range, not {0!r}".format(trim))
    values.sort()
    if trim > 0.25: # trimming more than 50% of the values does not make much sense
        prop_trim = (0.5-trim)/0.25
        return (prop_trim*tmean(values, 0.25))+((1.0-prop_trim)*median2(values))
    orig_len = len(values)
    trim_fully = int(math.floor(trim*orig_len))
    trim_partially = trim*orig_len
    if trim_fully:
        values = values[trim_fully:]
        values = values[:-1*trim_fully]
    center_values = values
    if (len(center_values) < 3):
        return mean(center_values)
    if (trim_partially > trim_fully):
        center_values = center_values[1:]
        center_values = center_values[:-1]
    curr_sum = math.fsum(map(float,center_values))
    div_by = len(center_values)
    
    if (trim_partially > trim_fully):
        curr_sum += values[0]*(trim_partially-trim_fully)
        curr_sum += values[-1]*(trim_partially-trim_fully)
        div_by += 2*(trim_partially-trim_fully)
    return curr_sum/div_by

[docs]def variance(values):
    values = list(values)
    m = mean(values)
    return math.fsum((v - m) ** 2 for v in values) / len(values)


[docs]def stdev(values):
    return math.sqrt(variance(values))


[docs]def softmax(values):
    """
    Compute the softmax of the given value set, v_i = exp(v_i) / s,
    where s = sum(exp(v_0), exp(v_1), ..).
    """
    e_values = list(map(math.exp, values))
    s = sum(e_values)
    inv_s = 1.0 / s
    return [ev * inv_s for ev in e_values]

def random_proportional_selection(freqs, max_freq=None):
    """Roulette selection - see http://jbn.github.io/fast_proportional_selection/ for analysis"""
    n = len(freqs)

    if n == 1:
        return 0
    elif n == 2:
        chance_first = float(freqs[0])/sum(freqs)
        if random.random() < chance_first:
            return 0
        return 1

    if max_freq is None:
        max_freq = max(freqs)

    if max_freq == 1:
        min_freq = min(freqs)
        if min_freq == 1:
            return random.choice(range(n))

    max_freq = float(max_freq)

    while True:
        i = int(n * random.random())
        if random.random() < (freqs[i] / max_freq):
            return i


# Lookup table for commonly used {value} -> value functions.
stat_functions = {'min': min, 'max': max, 'mean': mean, 'median': median,
                  'median2': median2, 'tmean': tmean}