001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo; 018 019import java.util.SplittableRandom; 020 021/** 022 * Stores information about real valued features. 023 * <p> 024 * Contains sufficient statistics to model the feature as a gaussian, plus the max and min values. 025 * <p> 026 * Does not contain an id number, but can be transformed into {@link RealIDInfo} which 027 * does contain an id number. 028 */ 029public class RealInfo extends SkeletalVariableInfo { 030 private static final long serialVersionUID = 1L; 031 032 /** 033 * The maximum observed feature value. 034 */ 035 protected double max = Double.NEGATIVE_INFINITY; 036 037 /** 038 * The minimum observed feature value. 039 */ 040 protected double min = Double.POSITIVE_INFINITY; 041 042 /** 043 * The feature mean. 044 */ 045 protected double mean = 0.0; 046 047 /** 048 * The sum of the squared feature values (used to compute the variance). 049 */ 050 protected double sumSquares = 0.0; 051 052 /** 053 * Creates an empty real info with the supplied name. 054 * @param name The feature name. 055 */ 056 public RealInfo(String name) { 057 super(name); 058 } 059 060 /** 061 * Creates a real info with the supplied starting conditions. 062 * <p> 063 * All observations are assumed to be of zero. 064 * @param name The feature name. 065 * @param count The number of zeros observed. 066 */ 067 public RealInfo(String name, int count) { 068 super(name, count); 069 } 070 071 /** 072 * Creates a real info with the supplied starting conditions. 073 * @param name The feature name. 074 * @param count The observation count. 075 * @param max The maximum observed value. 076 * @param min The minimum observed value. 077 * @param mean The mean observed value. 078 * @param sumSquares The sum of the squared values (used to calculate variance online). 079 */ 080 public RealInfo(String name, int count, double max, double min, double mean, double sumSquares) { 081 super(name, count); 082 this.max = max; 083 this.min = min; 084 this.mean = mean; 085 this.sumSquares = sumSquares; 086 } 087 088 /** 089 * Copy constructor. 090 * @param other The info to copy. 091 */ 092 public RealInfo(RealInfo other) { 093 this(other,other.name); 094 } 095 096 /** 097 * Copy constructor which renames the feature. Used to redact the feature name. 098 * @param other The info to copy. 099 * @param newName The new name. 100 */ 101 protected RealInfo(RealInfo other, String newName) { 102 super(newName,other.count); 103 this.max = other.max; 104 this.min = other.min; 105 this.mean = other.mean; 106 this.sumSquares = other.sumSquares; 107 } 108 109 @Override 110 protected void observe(double value) { 111 if (value != 0.0) { 112 super.observe(value); 113 if (value < min) { 114 min = value; 115 } 116 if (value > max) { 117 max = value; 118 } 119 double delta = value - mean; 120 mean += delta / count; 121 double delta2 = value - mean; 122 sumSquares += delta * delta2; 123 } 124 } 125 126 /** 127 * Gets the minimum observed value. 128 * @return The minimum value. 129 */ 130 public double getMin() { 131 return min; 132 } 133 134 /** 135 * Gets the maximum observed value. 136 * @return The maximum value. 137 */ 138 public double getMax() { 139 return max; 140 } 141 142 /** 143 * Gets the sample mean. 144 * @return The sample mean. 145 */ 146 public double getMean() { 147 return mean; 148 } 149 150 /** 151 * Gets the sample variance. 152 * @return The sample variance. 153 */ 154 public double getVariance() { 155 return sumSquares / (count-1); 156 } 157 158 @Override 159 public RealInfo copy() { 160 return new RealInfo(this); 161 } 162 163 @Override 164 public RealIDInfo makeIDInfo(int id) { 165 return new RealIDInfo(this,id); 166 } 167 168 @Override 169 public RealInfo rename(String newName) { 170 return new RealInfo(this,newName); 171 } 172 173 @Override 174 public double uniformSample(SplittableRandom rng) { 175 return (rng.nextDouble()*max) - min; 176 } 177 178 @Override 179 public String toString() { 180 return String.format("RealFeature(name=%s,count=%d,max=%f,min=%f,mean=%f,variance=%f)",name,count,max,min,mean,(sumSquares /(count-1))); 181 } 182}