001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo; 018 019import org.tribuo.hash.HashedFeatureMap; 020import org.tribuo.transform.Transformer; 021import org.tribuo.transform.TransformerMap; 022import org.tribuo.util.Merger; 023 024import java.io.Serializable; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.Collections; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Optional; 032 033/** 034 * An example used for training and evaluation. Examples have a true output 035 * associated with them or an instance from {@link OutputFactory#getUnknownOutput()} 036 * and a list of features that can be used for prediction. 037 * <p> 038 * An example is a sorted set of features, sorted by the String comparator on the feature 039 * name. 040 * <p> 041 * Examples have metadata associated with them, stored as a map from a String key, to 042 * an Object value. This metadata is append only for any given example, and the metadata 043 * values should be immutable (as they will be referenced rather than copied when an 044 * example is copied). 045 * @param <T> The type of output that this example contains. 046 */ 047public abstract class Example<T extends Output<T>> implements Iterable<Feature>, Serializable { 048 private static final long serialVersionUID = 1L; 049 050 /** 051 * The default initial size of the metadata map. 052 */ 053 protected static final int DEFAULT_METADATA_SIZE = 2; 054 055 /** 056 * The default weight. 057 */ 058 public static final float DEFAULT_WEIGHT = 1.0f; 059 060 /** 061 * By convention the example name is stored using this metadata key. 062 * <p> 063 * Note: not all examples are named. 064 */ 065 public static final String NAME = "name"; 066 067 /** 068 * The output associated with this example. 069 */ 070 protected final T output; 071 072 /** 073 * The weight associated with this example. 074 */ 075 protected float weight = DEFAULT_WEIGHT; 076 077 /** 078 * The example metadata. 079 */ 080 protected Map<String,Object> metadata = null; 081 082 /** 083 * Construct an empty example using the supplied output, weight and metadata. 084 * @param output The output. 085 * @param weight The weight. 086 * @param metadata The metadata. 087 */ 088 protected Example(T output, float weight, Map<String,Object> metadata) { 089 this.output = output; 090 this.weight = weight; 091 if (metadata != null && !metadata.isEmpty()) { 092 this.metadata = new HashMap<>(metadata); 093 } else { 094 this.metadata = null; 095 } 096 } 097 098 /** 099 * Construct an empty example using the supplied output and weight. 100 * @param output The output. 101 * @param weight The weight. 102 */ 103 protected Example(T output, float weight) { 104 this.output = output; 105 this.weight = weight; 106 } 107 108 /** 109 * Construct an empty example using the supplied output, metadata and 110 * {@link Example#DEFAULT_WEIGHT} as the weight. 111 * @param output The output. 112 * @param metadata The metadata. 113 */ 114 protected Example(T output, Map<String,Object> metadata) { 115 this.output = output; 116 if (metadata != null && !metadata.isEmpty()) { 117 this.metadata = new HashMap<>(metadata); 118 } else { 119 this.metadata = null; 120 } 121 } 122 123 /** 124 * Construct an empty example using the supplied output and 125 * {@link Example#DEFAULT_WEIGHT} as the weight. 126 * @param output The output. 127 */ 128 protected Example(T output) { 129 this.output = output; 130 } 131 132 /** 133 * Copies the output, weight and metadata into this example. 134 * @param other The example to copy. 135 */ 136 protected Example(Example<T> other) { 137 this.output = other.output; 138 this.weight = other.weight; 139 if (other.metadata != null && !other.metadata.isEmpty()) { 140 this.metadata = new HashMap<>(other.metadata); 141 } else { 142 this.metadata = null; 143 } 144 } 145 146 /** 147 * Gets the example's {@link Output}. 148 * @return The example's output. 149 */ 150 public T getOutput() { 151 return output; 152 } 153 154 /** 155 * Gets the example's weight. 156 * @return The example's weight. 157 */ 158 public float getWeight() { 159 return weight; 160 } 161 162 /** 163 * Sets the example's weight. 164 * @param weight The new weight. 165 */ 166 public void setWeight(float weight) { 167 this.weight = weight; 168 } 169 170 /** 171 * Gets the associated metadata value for this key, if it exists. 172 * Otherwise return {@link Optional#empty()}. 173 * @param key The key to check. 174 * @return The value if present. 175 */ 176 public synchronized Optional<Object> getMetadataValue(String key) { 177 if (metadata != null) { 178 return Optional.ofNullable(metadata.get(key)); 179 } else { 180 return Optional.empty(); 181 } 182 } 183 184 /** 185 * Puts the specified key, value pair into the metadata. 186 * <p> 187 * Example metadata is append only, and so this method 188 * throws {@link IllegalArgumentException} if the key is already present. 189 * @param key The key. 190 * @param value The value. 191 */ 192 public synchronized void setMetadataValue(String key, Object value) { 193 if (containsMetadata(key)) { 194 Object oldValue = metadata.get(key); 195 throw new IllegalArgumentException("Example metadata is append only. Key '" + key + "' is already associated with value '" + oldValue + "'"); 196 } 197 if (metadata == null) { 198 metadata = new HashMap<>(DEFAULT_METADATA_SIZE); 199 } 200 metadata.put(key,value); 201 } 202 203 /** 204 * Test if the metadata contains the supplied key. 205 * @param key The key to test. 206 * @return True if the metadata contains a value for the supplied key. 207 */ 208 public boolean containsMetadata(String key) { 209 if (metadata != null) { 210 return metadata.containsKey(key); 211 } else { 212 return false; 213 } 214 } 215 216 /** 217 * Returns a copy of this example's metadata. 218 * @return The metadata. 219 */ 220 public Map<String,Object> getMetadata() { 221 if (metadata != null) { 222 return new HashMap<>(metadata); 223 } else { 224 return Collections.emptyMap(); 225 } 226 } 227 228 /** 229 * Sorts the example by the string comparator. 230 */ 231 protected abstract void sort(); 232 233 /** 234 * Adds a feature. This maintains the sorted invariant and has a lg(example.size()) 235 * cost per insertion. 236 * @param feature The feature to add. 237 */ 238 public abstract void add(Feature feature); 239 240 /** 241 * Adds a collection of features. This maintains the sorted invariant but is 242 * more efficient than adding one at a time due to allocation. 243 * @param features The features to add. 244 */ 245 public abstract void addAll(Collection<? extends Feature> features); 246 247 /** 248 * Return how many features are in this example. 249 * @return The number of features. 250 */ 251 public abstract int size(); 252 253 /** 254 * Removes all features in this list from the Example. 255 * @param featureList Features to remove from this Example. 256 */ 257 public abstract void removeFeatures(List<Feature> featureList); 258 259 /** 260 * Merges features with the same name using the 261 * supplied {@link Merger}. 262 * @param merger A function to merge two doubles. 263 */ 264 public abstract void reduceByName(Merger merger); 265 266 /** 267 * Checks the example to see if all the feature names are unique, 268 * the feature values are not NaN, and there is at least one feature. 269 * @return true if the example is valid. 270 */ 271 public abstract boolean validateExample(); 272 273 /** 274 * Transforms this example by applying the transformations from the supplied {@link TransformerMap}. 275 * <p> 276 * Can be overridden for performance reasons. 277 * @param transformerMap The transformations to apply. 278 */ 279 public void transform(TransformerMap transformerMap) { 280 for (Map.Entry<String,List<Transformer>> e : transformerMap.entrySet()) { 281 Feature f = lookup(e.getKey()); 282 if (f != null) { 283 double value = f.getValue(); 284 for (Transformer t : e.getValue()) { 285 value = t.transform(value); 286 } 287 set(new Feature(f.getName(),value)); 288 } 289 } 290 } 291 292 /** 293 * Converts all implicit zeros into explicit zeros based on the supplied feature map. 294 * @param fMap The feature map to use for densification. 295 */ 296 protected void densify(FeatureMap fMap) { 297 // Densify! - guitar solo 298 List<String> featureNames = new ArrayList<>(fMap.keySet()); 299 Collections.sort(featureNames); 300 densify(featureNames); 301 } 302 303 /** 304 * Converts all implicit zeros into explicit zeros based on the supplied feature names. 305 * @param featureNames A *sorted* list of feature names. 306 */ 307 protected abstract void densify(List<String> featureNames); 308 309 /** 310 * Returns a deep copy of this Example. 311 * @return A deep copy of this example. 312 */ 313 public abstract Example<T> copy(); 314 315 /** 316 * Returns the Feature in this Example which has the supplied name, if it's present. 317 * @param i The feature name to lookup. 318 * @return The feature object. 319 */ 320 public Feature lookup(String i) { 321 for (Feature f : this) { 322 if (i.equals(f.getName())) { 323 return f; 324 } 325 } 326 return null; 327 } 328 329 /** 330 * Overwrites the feature with the matching name. 331 * <p> 332 * Throws {@link IllegalArgumentException} if there isn't a feature with that 333 * name in this example. 334 * @param feature The new feature value. 335 */ 336 public abstract void set(Feature feature); 337 338 /** 339 * Reassigns feature name Strings in the Example to point to those in the {@link FeatureMap}. 340 * This significantly reduces memory allocation. It is called when an Example is added 341 * to a {@link MutableDataset}, and should not be called outside of that context as it may interact 342 * unexpectedly with {@link HashedFeatureMap}. 343 * @param featureMap The feature map containing canonical feature names. 344 */ 345 public abstract void canonicalize(FeatureMap featureMap); 346}