001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo;
018
019import java.util.SplittableRandom;
020
021/**
022 * Stores information about real valued features.
023 * <p>
024 * Contains sufficient statistics to model the feature as a gaussian, plus the max and min values.
025 * <p>
026 * Does not contain an id number, but can be transformed into {@link RealIDInfo} which
027 * does contain an id number.
028 */
029public class RealInfo extends SkeletalVariableInfo {
030    private static final long serialVersionUID = 1L;
031
032    /**
033     * The maximum observed feature value.
034     */
035    protected double max = Double.NEGATIVE_INFINITY;
036
037    /**
038     * The minimum observed feature value.
039     */
040    protected double min = Double.POSITIVE_INFINITY;
041
042    /**
043     * The feature mean.
044     */
045    protected double mean = 0.0;
046
047    /**
048     * The sum of the squared feature values (used to compute the variance).
049     */
050    protected double sumSquares = 0.0;
051
052    /**
053     * Creates an empty real info with the supplied name.
054     * @param name The feature name.
055     */
056    public RealInfo(String name) {
057        super(name);
058    }
059
060    /**
061     * Creates a real info with the supplied starting conditions.
062     * <p>
063     * All observations are assumed to be of zero.
064     * @param name The feature name.
065     * @param count The number of zeros observed.
066     */
067    public RealInfo(String name, int count) {
068        super(name, count);
069    }
070
071    /**
072     * Creates a real info with the supplied starting conditions.
073     * @param name The feature name.
074     * @param count The observation count.
075     * @param max The maximum observed value.
076     * @param min The minimum observed value.
077     * @param mean The mean observed value.
078     * @param sumSquares The sum of the squared values (used to calculate variance online).
079     */
080    public RealInfo(String name, int count, double max, double min, double mean, double sumSquares) {
081        super(name, count);
082        this.max = max;
083        this.min = min;
084        this.mean = mean;
085        this.sumSquares = sumSquares;
086    }
087
088    /**
089     * Copy constructor.
090     * @param other The info to copy.
091     */
092    public RealInfo(RealInfo other) {
093        this(other,other.name);
094    }
095
096    /**
097     * Copy constructor which renames the feature. Used to redact the feature name.
098     * @param other The info to copy.
099     * @param newName The new name.
100     */
101    protected RealInfo(RealInfo other, String newName) {
102        super(newName,other.count);
103        this.max = other.max;
104        this.min = other.min;
105        this.mean = other.mean;
106        this.sumSquares = other.sumSquares;
107    }
108
109    @Override
110    protected void observe(double value) {
111        if (value != 0.0) {
112            super.observe(value);
113            if (value < min) {
114                min = value;
115            }
116            if (value > max) {
117                max = value;
118            }
119            double delta = value - mean;
120            mean += delta / count;
121            double delta2 = value - mean;
122            sumSquares += delta * delta2;
123        }
124    }
125
126    /**
127     * Gets the minimum observed value.
128     * @return The minimum value.
129     */
130    public double getMin() {
131        return min;
132    }
133
134    /**
135     * Gets the maximum observed value.
136     * @return The maximum value.
137     */
138    public double getMax() {
139        return max;
140    }
141
142    /**
143     * Gets the sample mean.
144     * @return The sample mean.
145     */
146    public double getMean() {
147        return mean;
148    }
149
150    /**
151     * Gets the sample variance.
152     * @return The sample variance.
153     */
154    public double getVariance() {
155        return sumSquares / (count-1);
156    }
157
158    @Override
159    public RealInfo copy() {
160        return new RealInfo(this);
161    }
162
163    @Override
164    public RealIDInfo makeIDInfo(int id) {
165        return new RealIDInfo(this,id);
166    }
167
168    @Override
169    public RealInfo rename(String newName) {
170        return new RealInfo(this,newName);
171    }
172
173    @Override
174    public double uniformSample(SplittableRandom rng) {
175        return (rng.nextDouble()*max) - min;
176    }
177
178    @Override
179    public String toString() {
180        return String.format("RealFeature(name=%s,count=%d,max=%f,min=%f,mean=%f,variance=%f)",name,count,max,min,mean,(sumSquares /(count-1)));
181    }
182}