001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo;
018
019import org.tribuo.hash.HashedFeatureMap;
020import org.tribuo.transform.Transformer;
021import org.tribuo.transform.TransformerMap;
022import org.tribuo.util.Merger;
023
024import java.io.Serializable;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.Collections;
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.Optional;
032
033/**
034 * An example used for training and evaluation. Examples have a true output
035 * associated with them or an instance from {@link OutputFactory#getUnknownOutput()}
036 * and a list of features that can be used for prediction.
037 * <p>
038 * An example is a sorted set of features, sorted by the String comparator on the feature
039 * name.
040 * <p>
041 * Examples have metadata associated with them, stored as a map from a String key, to
042 * an Object value. This metadata is append only for any given example, and the metadata
043 * values should be immutable (as they will be referenced rather than copied when an
044 * example is copied).
045 * @param <T> The type of output that this example contains.
046 */
047public abstract class Example<T extends Output<T>> implements Iterable<Feature>, Serializable {
048    private static final long serialVersionUID = 1L;
049
050    /**
051     * The default initial size of the metadata map.
052     */
053    protected static final int DEFAULT_METADATA_SIZE = 2;
054
055    /**
056     * The default weight.
057     */
058    public static final float DEFAULT_WEIGHT = 1.0f;
059
060    /**
061     * By convention the example name is stored using this metadata key.
062     * <p>
063     * Note: not all examples are named.
064     */
065    public static final String NAME = "name";
066
067    /**
068     * The output associated with this example.
069     */
070    protected final T output;
071
072    /**
073     * The weight associated with this example.
074     */
075    protected float weight = DEFAULT_WEIGHT;
076
077    /**
078     * The example metadata.
079     */
080    protected Map<String,Object> metadata = null;
081
082    /**
083     * Construct an empty example using the supplied output, weight and metadata.
084     * @param output The output.
085     * @param weight The weight.
086     * @param metadata The metadata.
087     */
088    protected Example(T output, float weight, Map<String,Object> metadata) {
089        this.output = output;
090        this.weight = weight;
091        if (metadata != null && !metadata.isEmpty()) {
092            this.metadata = new HashMap<>(metadata);
093        } else {
094            this.metadata = null;
095        }
096    }
097
098    /**
099     * Construct an empty example using the supplied output and weight.
100     * @param output The output.
101     * @param weight The weight.
102     */
103    protected Example(T output, float weight) {
104        this.output = output;
105        this.weight = weight;
106    }
107
108    /**
109     * Construct an empty example using the supplied output, metadata and
110     * {@link Example#DEFAULT_WEIGHT} as the weight.
111     * @param output The output.
112     * @param metadata The metadata.
113     */
114    protected Example(T output, Map<String,Object> metadata) {
115        this.output = output;
116        if (metadata != null && !metadata.isEmpty()) {
117            this.metadata = new HashMap<>(metadata);
118        } else {
119            this.metadata = null;
120        }
121    }
122
123    /**
124     * Construct an empty example using the supplied output and
125     * {@link Example#DEFAULT_WEIGHT} as the weight.
126     * @param output The output.
127     */
128    protected Example(T output) {
129        this.output = output;
130    }
131
132    /**
133     * Copies the output, weight and metadata into this example.
134     * @param other The example to copy.
135     */
136    protected Example(Example<T> other) {
137        this.output = other.output;
138        this.weight = other.weight;
139        if (other.metadata != null && !other.metadata.isEmpty()) {
140            this.metadata = new HashMap<>(other.metadata);
141        } else {
142            this.metadata = null;
143        }
144    }
145
146    /**
147     * Gets the example's {@link Output}.
148     * @return The example's output.
149     */
150    public T getOutput() {
151        return output;
152    }
153
154    /**
155     * Gets the example's weight.
156     * @return The example's weight.
157     */
158    public float getWeight() {
159        return weight;
160    }
161
162    /**
163     * Sets the example's weight.
164     * @param weight The new weight.
165     */
166    public void setWeight(float weight) {
167        this.weight = weight;
168    }
169
170    /**
171     * Gets the associated metadata value for this key, if it exists.
172     * Otherwise return {@link Optional#empty()}.
173     * @param key The key to check.
174     * @return The value if present.
175     */
176    public synchronized Optional<Object> getMetadataValue(String key) {
177        if (metadata != null) {
178            return Optional.ofNullable(metadata.get(key));
179        } else {
180            return Optional.empty();
181        }
182    }
183
184    /**
185     * Puts the specified key, value pair into the metadata.
186     * <p>
187     * Example metadata is append only, and so this method
188     * throws {@link IllegalArgumentException} if the key is already present.
189     * @param key The key.
190     * @param value The value.
191     */
192    public synchronized void setMetadataValue(String key, Object value) {
193        if (containsMetadata(key)) {
194            Object oldValue = metadata.get(key);
195            throw new IllegalArgumentException("Example metadata is append only. Key '" + key + "' is already associated with value '" + oldValue + "'");
196        }
197        if (metadata == null) {
198            metadata = new HashMap<>(DEFAULT_METADATA_SIZE);
199        }
200        metadata.put(key,value);
201    }
202
203    /**
204     * Test if the metadata contains the supplied key.
205     * @param key The key to test.
206     * @return True if the metadata contains a value for the supplied key.
207     */
208    public boolean containsMetadata(String key) {
209        if (metadata != null) {
210            return metadata.containsKey(key);
211        } else {
212            return false;
213        }
214    }
215
216    /**
217     * Returns a copy of this example's metadata.
218     * @return The metadata.
219     */
220    public Map<String,Object> getMetadata() {
221        if (metadata != null) {
222            return new HashMap<>(metadata);
223        } else {
224            return Collections.emptyMap();
225        }
226    }
227
228    /**
229     * Sorts the example by the string comparator.
230     */
231    protected abstract void sort();
232
233    /**
234     * Adds a feature. This maintains the sorted invariant and has a lg(example.size())
235     * cost per insertion.
236     * @param feature The feature to add.
237     */
238    public abstract void add(Feature feature);
239
240    /**
241     * Adds a collection of features. This maintains the sorted invariant but is
242     * more efficient than adding one at a time due to allocation.
243     * @param features The features to add.
244     */
245    public abstract void addAll(Collection<? extends Feature> features);
246
247    /**
248     * Return how many features are in this example.
249     * @return The number of features.
250     */
251    public abstract int size();
252
253    /**
254     * Removes all features in this list from the Example.
255     * @param featureList Features to remove from this Example.
256     */
257    public abstract void removeFeatures(List<Feature> featureList);
258
259    /**
260     * Merges features with the same name using the
261     * supplied {@link Merger}.
262     * @param merger A function to merge two doubles.
263     */
264    public abstract void reduceByName(Merger merger);
265
266    /**
267     * Checks the example to see if all the feature names are unique,
268     * the feature values are not NaN, and there is at least one feature.
269     * @return true if the example is valid.
270     */
271    public abstract boolean validateExample();
272
273    /**
274     * Transforms this example by applying the transformations from the supplied {@link TransformerMap}.
275     * <p>
276     * Can be overridden for performance reasons.
277     * @param transformerMap The transformations to apply.
278     */
279    public void transform(TransformerMap transformerMap) {
280        for (Map.Entry<String,List<Transformer>> e : transformerMap.entrySet()) {
281            Feature f = lookup(e.getKey());
282            if (f != null) {
283                double value = f.getValue();
284                for (Transformer t : e.getValue()) {
285                    value = t.transform(value);
286                }
287                set(new Feature(f.getName(),value));
288            }
289        }
290    }
291
292    /**
293     * Converts all implicit zeros into explicit zeros based on the supplied feature map.
294     * @param fMap The feature map to use for densification.
295     */
296    protected void densify(FeatureMap fMap) {
297        // Densify! - guitar solo
298        List<String> featureNames = new ArrayList<>(fMap.keySet());
299        Collections.sort(featureNames);
300        densify(featureNames);
301    }
302
303    /**
304     * Converts all implicit zeros into explicit zeros based on the supplied feature names.
305     * @param featureNames A *sorted* list of feature names.
306     */
307    protected abstract void densify(List<String> featureNames);
308
309    /**
310     * Returns a deep copy of this Example.
311     * @return A deep copy of this example.
312     */
313    public abstract Example<T> copy();
314
315    /**
316     * Returns the Feature in this Example which has the supplied name, if it's present.
317     * @param i The feature name to lookup.
318     * @return The feature object.
319     */
320    public Feature lookup(String i) {
321        for (Feature f : this) {
322            if (i.equals(f.getName())) {
323                return f;
324            }
325        }
326        return null;
327    }
328
329    /**
330     * Overwrites the feature with the matching name.
331     * <p>
332     * Throws {@link IllegalArgumentException} if there isn't a feature with that
333     * name in this example.
334     * @param feature The new feature value.
335     */
336    public abstract void set(Feature feature);
337
338    /**
339     * Reassigns feature name Strings in the Example to point to those in the {@link FeatureMap}.
340     * This significantly reduces memory allocation. It is called when an Example is added
341     * to a {@link MutableDataset}, and should not be called outside of that context as it may interact
342     * unexpectedly with {@link HashedFeatureMap}.
343     * @param featureMap The feature map containing canonical feature names.
344     */
345    public abstract void canonicalize(FeatureMap featureMap);
346}