java.lang.Object

org.tribuo.util.Util

public final class Util extends Object

Ye olde util class.

Basically full of vector and RNG operations.

Method Summary

Modifier and Type

Method

Description

static <T extends Comparable<T>> com.oracle.labs.mlrg.olcut.util.Pair<Integer,T>

argmax(List<T> values)

Find the index of the maximum value in a list.

static <T extends Comparable<T>> com.oracle.labs.mlrg.olcut.util.Pair<Integer,T>

argmin(List<T> values)

Find the index of the minimum value in a list.

static double

auc(double[] x, double[] y)

Calculates the area under the curve, bounded below by the x axis.

static <T> int

binarySearch(List<? extends Comparable<? super T>> list, T key)

A binary search function.

static <T> int

binarySearch(List<? extends Comparable<? super T>> list, T key, int low, int high)

A binary search function.

static <T> int

binarySearch(List<? extends T> list, int key, ToIntFunction<T> extractionFunc)

A binary search function.

static int[]

cumulativeSum(boolean[] input)

Produces a cumulative sum array.

static double[]

cumulativeSum(double[] input)

Produces a cumulative sum array.

static int[]

differencesIndices(double[] input)

Returns an array containing the indices where values are different.

static int[]

differencesIndices(double[] input, double tolerance)

Returns an array containing the indices where values are different.

static String

formatDuration(long startMillis, long stopMillis)

Formats a duration given two times in milliseconds.

static int[]

generateBootstrapIndices(int size, Random rng)

Draws a bootstrap sample of indices.

static int[]

generateBootstrapIndices(int size, SplittableRandom rng)

Draws a bootstrap sample of indices.

static double[]

generateCDF(double[] pmf)

Generates a cumulative distribution function from the supplied probability mass function.

static double[]

generateCDF(float[] pmf)

Generates a cumulative distribution function from the supplied probability mass function.

static double[]

generateCDF(long[] counts, long countSum)

Generates a cumulative distribution function from the supplied probability mass function.

static float[]

generateUniformFloatVector(int length, float value)

static double[]

generateUniformVector(int length, double value)

static float[]

generateUniformVector(int length, float value)

static int[]

generateWeightedIndicesSample(int size, double[] weights, Random rng)

Generates a sample of indices weighted by the provided weights.

static int[]

generateWeightedIndicesSample(int size, double[] weights, SplittableRandom rng)

Generates a sample of indices weighted by the provided weights.

static int[]

generateWeightedIndicesSample(int size, float[] weights, Random rng)

Generates a sample of indices weighted by the provided weights.

static int[]

generateWeightedIndicesSample(int size, float[] weights, SplittableRandom rng)

Generates a sample of indices weighted by the provided weights.

static int[]

generateWeightedIndicesSampleWithoutReplacement(int size, double[] weights, Random rng)

Generates a sample of indices weighted by the provided weights without replacement.

static int[]

generateWeightedIndicesSampleWithoutReplacement(int size, float[] weights, Random rng)

Generates a sample of indices weighted by the provided weights without replacement.

static void

inPlaceAdd(double[] input, double[] update)

static void

inPlaceAdd(float[] input, float[] update)

static double[]

inplaceNormalizeToDistribution(double[] input)

static void

inplaceNormalizeToDistribution(float[] input)

static void

inPlaceSubtract(double[] input, double[] update)

static void

inPlaceSubtract(float[] input, float[] update)

static void

logVector(Logger otherLogger, Level level, double[] input)

static void

logVector(Logger otherLogger, Level level, float[] input)

static double

mean(double[] inputs)

Returns the mean of the input array.

static double

mean(double[] array, int length)

static <V extends Number> double

mean(Collection<V> values)

static com.oracle.labs.mlrg.olcut.util.Pair<Double,Double>

meanAndVariance(double[] inputs)

Returns the mean and variance of the input.

static com.oracle.labs.mlrg.olcut.util.Pair<Double,Double>

meanAndVariance(double[] inputs, int length)

Returns the mean and variance of the input's first length elements.

static double[]

normalizeToDistribution(double[] input)

static double[]

normalizeToDistribution(float[] input)

static int[]

randperm(int size, Random rng)

Shuffles the indices in the range [0,size).

static int[]

randperm(int size, SplittableRandom rng)

Shuffles the indices in the range [0,size).

static void

randpermInPlace(double[] input, SplittableRandom rng)

Shuffles the input.

static void

randpermInPlace(int[] input, Random rng)

Shuffles the input.

static void

randpermInPlace(int[] input, SplittableRandom rng)

Shuffles the input.

static int

sampleFromCDF(double[] cdf, Random rng)

Samples an index from the supplied cdf.

static int

sampleFromCDF(double[] cdf, SplittableRandom rng)

Samples an index from the supplied cdf.

static int[]

sampleInts(Random rng, int size, int range)

static <V extends Number> double

sampleStandardDeviation(Collection<V> values)

static <V extends Number> double

sampleVariance(Collection<V> values)

static int[]

sortedDifference(int[] first, int[] second)

Expects sorted input arrays.

static double[]

standardize(double[] input, double mean, double variance)

Standardizes the input so it has zero mean and unit variance, i.e., subtracts the mean and divides by the variance.

static void

standardizeInPlace(double[] input, double mean, double variance)

Standardizes the input so it has zero mean and unit variance, i.e., subtracts the mean and divides by the variance.

static double

sum(double[] input)

static double

sum(double[] array, int length)

static float

sum(float[] input)

static float

sum(float[] array, int length)

static float

sum(int[] indices, float[] input)

static float

sum(int[] indices, int indicesLength, float[] input)

static double[]

toDoubleArray(float[] floats)

Convert an array of floats to an array of doubles.

static float[]

toFloatArray(double[] doubles)

Convert an array of doubles to an array of floats.

static double[]

toPrimitiveDouble(List<Double> input)

static double[]

toPrimitiveDoubleFromInteger(List<Integer> input)

static float[]

toPrimitiveFloat(List<Float> input)

static int[]

toPrimitiveInt(List<Integer> input)

static long[]

toPrimitiveLong(List<Long> input)

static double

vectorNorm(double[] input)

static double

weightedMean(double[] inputs, double[] weights)

Returns the weighted mean of the input.

static double

weightedMean(double[] array, float[] weights, int length)

static double

weightedSum(double[] array, float[] weights, int length)

Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait

Method Details
- argmax
  
  public static <T extends Comparable<T>> com.oracle.labs.mlrg.olcut.util.Pair<Integer,T> argmax(List<T> values)
  
  Find the index of the maximum value in a list.
  
  Type Parameters:
  
  T - the type of the values (must implement Comparable)
  
  Parameters:
  
  values - list
  
  Returns:
  
  a pair: (index of the max value, max value)
- argmin
  
  public static <T extends Comparable<T>> com.oracle.labs.mlrg.olcut.util.Pair<Integer,T> argmin(List<T> values)
  
  Find the index of the minimum value in a list.
  
  Type Parameters:
  
  T - the type of the values (must implement Comparable)
  
  Parameters:
  
  values - list
  
  Returns:
  
  a pair: (index of the min value, min value)
- toFloatArray
  
  public static float[] toFloatArray(double[] doubles)
  
  Convert an array of doubles to an array of floats.
  
  Parameters:
  
  doubles - The array of doubles to convert.
  
  Returns:
  
  An array of floats.
- toDoubleArray
  
  public static double[] toDoubleArray(float[] floats)
  
  Convert an array of floats to an array of doubles.
  
  Parameters:
  
  floats - The array of floats to convert.
  
  Returns:
  
  An array of doubles.
- randperm
  
  public static int[] randperm(int size, Random rng)
  
  Shuffles the indices in the range [0,size).
  
  Parameters:
  
  size - The number of elements.
  
  rng - The random number generator to use.
  
  Returns:
  
  A random permutation of the values in the range (0, size-1).
- randperm
  
  public static int[] randperm(int size, SplittableRandom rng)
  
  Shuffles the indices in the range [0,size).
  
  Parameters:
  
  size - The number of elements.
  
  rng - The random number generator to use.
  
  Returns:
  
  A random permutation of the values in the range (0, size-1).
- randpermInPlace
  
  public static void randpermInPlace(int[] input, Random rng)
  
  Shuffles the input.
  
  Parameters:
  
  input - The array to shuffle.
  
  rng - The random number generator to use.
- randpermInPlace
  
  public static void randpermInPlace(int[] input, SplittableRandom rng)
  
  Shuffles the input.
  
  Parameters:
  
  input - The array to shuffle.
  
  rng - The random number generator to use.
- randpermInPlace
  
  public static void randpermInPlace(double[] input, SplittableRandom rng)
  
  Shuffles the input.
  
  Parameters:
  
  input - The array to shuffle.
  
  rng - The random number generator to use.
- generateBootstrapIndices
  
  public static int[] generateBootstrapIndices(int size, Random rng)
  
  Draws a bootstrap sample of indices.
  
  Parameters:
  
  size - Size of the sample to generate.
  
  rng - The RNG to use.
  
  Returns:
  
  A bootstrap sample.
- generateBootstrapIndices
  
  public static int[] generateBootstrapIndices(int size, SplittableRandom rng)
  
  Draws a bootstrap sample of indices.
  
  Parameters:
  
  size - Size of the sample to generate.
  
  rng - The RNG to use.
  
  Returns:
  
  A bootstrap sample.
- generateWeightedIndicesSample
  
  public static int[] generateWeightedIndicesSample(int size, double[] weights, Random rng)
  
  Generates a sample of indices weighted by the provided weights.
  
  Parameters:
  
  size - Size of the sample to generate.
  
  weights - A probability mass function of weights.
  
  rng - The RNG to use.
  
  Returns:
  
  A sample with replacement from weights.
- generateWeightedIndicesSample
  
  public static int[] generateWeightedIndicesSample(int size, float[] weights, Random rng)
  
  Generates a sample of indices weighted by the provided weights.
  
  Parameters:
  
  size - Size of the sample to generate.
  
  weights - A probability mass function of weights.
  
  rng - The RNG to use.
  
  Returns:
  
  A sample with replacement from weights.
- generateWeightedIndicesSample
  
  public static int[] generateWeightedIndicesSample(int size, double[] weights, SplittableRandom rng)
  
  Generates a sample of indices weighted by the provided weights.
  
  Parameters:
  
  size - Size of the sample to generate.
  
  weights - A probability mass function of weights.
  
  rng - The RNG to use.
  
  Returns:
  
  A sample with replacement from weights.
- generateWeightedIndicesSample
  
  public static int[] generateWeightedIndicesSample(int size, float[] weights, SplittableRandom rng)
  
  Generates a sample of indices weighted by the provided weights.
  
  Parameters:
  
  size - Size of the sample to generate.
  
  weights - A probability mass function of weights.
  
  rng - The RNG to use.
  
  Returns:
  
  A sample with replacement from weights.
- generateWeightedIndicesSampleWithoutReplacement
  
  public static int[] generateWeightedIndicesSampleWithoutReplacement(int size, double[] weights, Random rng)
  
  Generates a sample of indices weighted by the provided weights without replacement. Does not recalculate proportions in-between samples. Use judiciously.
  
  Parameters:
  
  size - Size of the sample to generate
  
  weights - A probability mass function of weights
  
  rng - The RNG to use
  
  Returns:
  
  A sample without replacement from weights
- generateWeightedIndicesSampleWithoutReplacement
  
  public static int[] generateWeightedIndicesSampleWithoutReplacement(int size, float[] weights, Random rng)
  
  Generates a sample of indices weighted by the provided weights without replacement. Does not recalculate proportions in-between samples. Use judiciously.
  
  Parameters:
  
  size - Size of the sample to generate
  
  weights - A probability mass function of weights
  
  rng - The RNG to use
  
  Returns:
  
  A sample without replacement from weights
- generateCDF
  
  public static double[] generateCDF(double[] pmf)
  
  Generates a cumulative distribution function from the supplied probability mass function.
  
  Parameters:
  
  pmf - The probability mass function (i.e., the probability distribution).
  
  Returns:
  
  The CDF.
- cumulativeSum
  
  public static double[] cumulativeSum(double[] input)
  
  Produces a cumulative sum array.
  
  Parameters:
  
  input - The input to sum.
  
  Returns:
  
  The cumulative sum.
- cumulativeSum
  
  public static int[] cumulativeSum(boolean[] input)
  
  Produces a cumulative sum array.
  
  Parameters:
  
  input - The input to sum.
  
  Returns:
  
  The cumulative sum.
- generateCDF
  
  public static double[] generateCDF(float[] pmf)
  
  Generates a cumulative distribution function from the supplied probability mass function.
  
  Parameters:
  
  pmf - The probability mass function (i.e., the probability distribution).
  
  Returns:
  
  The CDF.
- generateCDF
  
  public static double[] generateCDF(long[] counts, long countSum)
  
  Generates a cumulative distribution function from the supplied probability mass function.
  
  Parameters:
  
  counts - The frequency counts.
  
  countSum - The sum of the counts.
  
  Returns:
  
  The CDF.
- sampleFromCDF
  
  public static int sampleFromCDF(double[] cdf, Random rng)
  
  Samples an index from the supplied cdf.
  
  Parameters:
  
  cdf - The cdf to sample from.
  
  rng - The rng to use.
  
  Returns:
  
  A sample.
- sampleFromCDF
  
  public static int sampleFromCDF(double[] cdf, SplittableRandom rng)
  
  Samples an index from the supplied cdf.
  
  Parameters:
  
  cdf - The cdf to sample from.
  
  rng - The rng to use.
  
  Returns:
  
  A sample.
- generateUniformVector
  
  public static double[] generateUniformVector(int length, double value)
- generateUniformVector
  
  public static float[] generateUniformVector(int length, float value)
- normalizeToDistribution
  
  public static double[] normalizeToDistribution(double[] input)
- normalizeToDistribution
  
  public static double[] normalizeToDistribution(float[] input)
- inplaceNormalizeToDistribution
  
  public static double[] inplaceNormalizeToDistribution(double[] input)
- inplaceNormalizeToDistribution
  
  public static void inplaceNormalizeToDistribution(float[] input)
- logVector
  
  public static void logVector(Logger otherLogger, Level level, double[] input)
- logVector
  
  public static void logVector(Logger otherLogger, Level level, float[] input)
- toPrimitiveDoubleFromInteger
  
  public static double[] toPrimitiveDoubleFromInteger(List<Integer> input)
- toPrimitiveDouble
  
  public static double[] toPrimitiveDouble(List<Double> input)
- toPrimitiveFloat
  
  public static float[] toPrimitiveFloat(List<Float> input)
- toPrimitiveInt
  
  public static int[] toPrimitiveInt(List<Integer> input)
- toPrimitiveLong
  
  public static long[] toPrimitiveLong(List<Long> input)
- sampleInts
  
  public static int[] sampleInts(Random rng, int size, int range)
- inPlaceAdd
  
  public static void inPlaceAdd(double[] input, double[] update)
- inPlaceSubtract
  
  public static void inPlaceSubtract(double[] input, double[] update)
- inPlaceAdd
  
  public static void inPlaceAdd(float[] input, float[] update)
- inPlaceSubtract
  
  public static void inPlaceSubtract(float[] input, float[] update)
- vectorNorm
  
  public static double vectorNorm(double[] input)
- sum
  
  public static double sum(double[] input)
- sum
  
  public static float sum(float[] input)
- sum
  
  public static double sum(double[] array, int length)
- sum
  
  public static float sum(float[] array, int length)
- sum
  
  public static float sum(int[] indices, int indicesLength, float[] input)
- sum
  
  public static float sum(int[] indices, float[] input)
- generateUniformFloatVector
  
  public static float[] generateUniformFloatVector(int length, float value)
- binarySearch
  
  public static <T> int binarySearch(List<? extends Comparable<? super T>> list, T key)
  
  A binary search function.
  
  Type Parameters:
  
  T - Type of the list, must implement Comparable.
  
  Parameters:
  
  list - Input list, must be ordered.
  
  key - Key to search for.
  
  Returns:
  
  the index of the search key, if it is contained in the list; otherwise, (-(insertion point) - 1). The insertion point is defined as the point at which the key would be inserted into the list: the index of the first element greater than the key, or list.size() if all elements in the list are less than the specified key. Note that this guarantees that the return value will be >= 0 if and only if the key is found.
- binarySearch
  
  public static <T> int binarySearch(List<? extends Comparable<? super T>> list, T key, int low, int high)
  
  A binary search function.
  
  Type Parameters:
  
  T - Type of the list, must implement Comparable.
  
  Parameters:
  
  list - Input list, must be ordered.
  
  key - Key to search for.
  
  low - Starting index.
  
  high - End index (will be searched).
  
  Returns:
  
  the index of the search key, if it is contained in the list; otherwise, (-(insertion point) - 1). The insertion point is defined as the point at which the key would be inserted into the list: the index of the first element greater than the key, or high if all elements in the list are less than the specified key. Note that this guarantees that the return value will be >= 0 if and only if the key is found.
- binarySearch
  
  public static <T> int binarySearch(List<? extends T> list, int key, ToIntFunction<T> extractionFunc)
  
  A binary search function.
  
  Type Parameters:
  
  T - Type of the list, must implement Comparable.
  
  Parameters:
  
  list - Input list, must be ordered.
  
  key - Key to search for.
  
  extractionFunc - Takes a T and generates an int which can be used for comparison using int's natural ordering.
  
  Returns:
  
  the index of the search key, if it is contained in the list; otherwise, (-(insertion point) - 1). The insertion point is defined as the point at which the key would be inserted into the list: the index of the first element greater than the key, or high if all elements in the list are less than the specified key. Note that this guarantees that the return value will be >= 0 if and only if the key is found.
- auc
  
  public static double auc(double[] x, double[] y)
  
  Calculates the area under the curve, bounded below by the x axis.
  Uses linear interpolation between the points on the x axis, i.e., trapezoidal integration.
  The x axis must be increasing.
  
  Parameters:
  
  x - The x points to evaluate.
  
  y - The corresponding heights.
  
  Returns:
  
  The AUC.
- meanAndVariance
  
  public static com.oracle.labs.mlrg.olcut.util.Pair<Double,Double> meanAndVariance(double[] inputs)
  
  Returns the mean and variance of the input.
  
  Parameters:
  
  inputs - The input array.
  
  Returns:
  
  The mean and variance of the inputs. The mean is the first element, the variance is the second.
- meanAndVariance
  
  public static com.oracle.labs.mlrg.olcut.util.Pair<Double,Double> meanAndVariance(double[] inputs, int length)
  
  Returns the mean and variance of the input's first length elements.
  
  Parameters:
  
  inputs - The input array.
  
  length - The number of elements to use.
  
  Returns:
  
  The mean and variance of the inputs. The mean is the first element, the variance is the second.
- weightedMean
  
  public static double weightedMean(double[] inputs, double[] weights)
  
  Returns the weighted mean of the input.
  Throws IllegalArgumentException if the two arrays are not the same length.
  
  Parameters:
  
  inputs - The input array.
  
  weights - The weights to use.
  
  Returns:
  
  The weighted mean.
- mean
  
  public static double mean(double[] inputs)
  
  Returns the mean of the input array.
  
  Parameters:
  
  inputs - The input array.
  
  Returns:
  
  The mean of inputs.
- mean
  
  public static double mean(double[] array, int length)
- mean
  
  public static <V extends Number> double mean(Collection<V> values)
- sampleVariance
  
  public static <V extends Number> double sampleVariance(Collection<V> values)
- sampleStandardDeviation
  
  public static <V extends Number> double sampleStandardDeviation(Collection<V> values)
- weightedMean
  
  public static double weightedMean(double[] array, float[] weights, int length)
- weightedSum
  
  public static double weightedSum(double[] array, float[] weights, int length)
- differencesIndices
  
  public static int[] differencesIndices(double[] input)
  
  Returns an array containing the indices where values are different. Basically a combination of np.where and np.diff.
  Stores an index if the value after it is different. Always stores the final index.
  Uses a default tolerance of 1e-12.
  
  Parameters:
  
  input - Input array.
  
  Returns:
  
  An array containing the indices where the input changes.
- differencesIndices
  
  public static int[] differencesIndices(double[] input, double tolerance)
  
  Returns an array containing the indices where values are different. Basically a combination of np.where and np.diff.
  Stores an index if the value after it is different. Always stores the final index.
  
  Parameters:
  
  input - Input array.
  
  tolerance - Tolerance to determine a difference.
  
  Returns:
  
  An array containing the indices where the input changes.
- formatDuration
  
  public static String formatDuration(long startMillis, long stopMillis)
  
  Formats a duration given two times in milliseconds.
  Format string is - (%02d:%02d:%02d:%03d) or (%d days, %02d:%02d:%02d:%03d)
  
  Parameters:
  
  startMillis - Start time in ms.
  
  stopMillis - End time in ms.
  
  Returns:
  
  A formatted string measuring time in hours, minutes, second and milliseconds.
- sortedDifference
  
  public static int[] sortedDifference(int[] first, int[] second)
  
  Expects sorted input arrays. Returns an array containing all the elements in first that are not in second.
  
  Parameters:
  
  first - The first sorted array.
  
  second - The second sorted array.
  
  Returns:
  
  An array containing all the elements of first that aren't in second.
- standardize
  
  public static double[] standardize(double[] input, double mean, double variance)
  
  Standardizes the input so it has zero mean and unit variance, i.e., subtracts the mean and divides by the variance.
  
  Parameters:
  
  input - The input to standardize.
  
  mean - The mean.
  
  variance - The variance.
  
  Returns:
  
  The standardized input.
- standardizeInPlace
  
  public static void standardizeInPlace(double[] input, double mean, double variance)
  
  Standardizes the input so it has zero mean and unit variance, i.e., subtracts the mean and divides by the variance. Operates in place on the input array.
  
  Parameters:
  
  input - The input to standardize.
  
  mean - The mean.
  
  variance - The variance.

Class Util

Method Summary

Methods inherited from class java.lang.Object

Method Details

argmax

argmin

toFloatArray

toDoubleArray

randperm

randperm

randpermInPlace

randpermInPlace

randpermInPlace

generateBootstrapIndices

generateBootstrapIndices

generateWeightedIndicesSample

generateWeightedIndicesSample

generateWeightedIndicesSample

generateWeightedIndicesSample

generateWeightedIndicesSampleWithoutReplacement

generateWeightedIndicesSampleWithoutReplacement

generateCDF

cumulativeSum

cumulativeSum

generateCDF

generateCDF

sampleFromCDF

sampleFromCDF

generateUniformVector

generateUniformVector

normalizeToDistribution

normalizeToDistribution

inplaceNormalizeToDistribution

inplaceNormalizeToDistribution

logVector

logVector

toPrimitiveDoubleFromInteger

toPrimitiveDouble

toPrimitiveFloat

toPrimitiveInt

toPrimitiveLong

sampleInts

inPlaceAdd

inPlaceSubtract

inPlaceAdd

inPlaceSubtract

vectorNorm

sum

sum

sum

sum

sum

sum

generateUniformFloatVector

binarySearch

binarySearch

binarySearch

auc

meanAndVariance

meanAndVariance

weightedMean

mean

mean

mean

sampleVariance

sampleStandardDeviation

weightedMean

weightedSum

differencesIndices

differencesIndices

formatDuration

sortedDifference

standardize

standardizeInPlace