001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.sequence;
018
019import org.tribuo.Example;
020import org.tribuo.Feature;
021import org.tribuo.FeatureMap;
022import org.tribuo.Output;
023import org.tribuo.OutputFactory;
024import org.tribuo.hash.HashedFeatureMap;
025import org.tribuo.impl.ArrayExample;
026import org.tribuo.impl.BinaryFeaturesExample;
027import org.tribuo.util.Merger;
028
029import java.io.Serializable;
030import java.util.ArrayList;
031import java.util.Iterator;
032import java.util.List;
033import java.util.logging.Logger;
034
035/**
036 * A sequence of examples, used for sequence classification.
037 */
038public class SequenceExample<T extends Output<T>> implements Iterable<Example<T>>, Serializable {
039    private static final long serialVersionUID = 1L;
040
041    private static final Logger logger = Logger.getLogger(SequenceExample.class.getName());
042
043    public static final float DEFAULT_WEIGHT = 1.0f;
044
045    private final List<Example<T>> examples;
046    private float weight = 1.0f;
047
048    /**
049     * Creates an empty sequence example.
050     */
051    public SequenceExample() {
052        this(new ArrayList<>());
053    }
054
055    /**
056     * Creates a sequence example from the list of examples.
057     * <p>
058     * The examples are not copied by this method.
059     * @param examples The examples to incorporate.
060     */
061    public SequenceExample(List<Example<T>> examples) {
062        this(examples,DEFAULT_WEIGHT);
063    }
064
065    /**
066     * Creates a sequence example from the list of examples, setting the weight.
067     * <p>
068     * The examples are encapsulated by this constructor, not copied.
069     * @param examples The examples to incorporate.
070     * @param weight The weight of this sequence.
071     */
072    public SequenceExample(List<Example<T>> examples, float weight) {
073        this.examples = examples;
074        this.weight = weight;
075    }
076
077    /**
078     * Creates a sequence example from the supplied outputs and list of list of features.
079     * <p>
080     * The features are copied out by this constructor. The outputs and features lists
081     * must be of the same length. Sets the weight to {@link SequenceExample#DEFAULT_WEIGHT}.
082     * @param outputs The outputs for each sequence element.
083     * @param features The features for each sequence element.
084     */
085    public SequenceExample(List<T> outputs, List<? extends List<? extends Feature>> features) {
086        this(outputs,features,DEFAULT_WEIGHT);
087    }
088
089    /**
090     * Creates a sequence example from the supplied weight, outputs and list of list of features.
091     * <p>
092     * The features are copied out by this constructor. The outputs and features lists
093     * must be of the same length.
094     * @param outputs The outputs for each sequence element.
095     * @param features The features for each sequence element.
096     * @param weight The weight for this sequence example.
097     */
098    public SequenceExample(List<T> outputs, List<? extends List<? extends Feature>> features, float weight) {
099        this(outputs, features, weight, false);
100    }
101
102    public SequenceExample(List<T> outputs, List<? extends List<? extends Feature>> features, boolean attemptBinaryFeatures) {
103        this(outputs, features, DEFAULT_WEIGHT, attemptBinaryFeatures);
104    }
105    
106    public SequenceExample(List<T> outputs, List<? extends List<? extends Feature>> features, float weight, boolean attemptBinaryFeatures) {
107        if (outputs.size() != features.size()) {
108            throw new IllegalArgumentException("outputs.size() = " + outputs.size() + ", features.size() = " + features.size());
109        }
110
111        List<Example<T>> examples = new ArrayList<>(outputs.size());
112
113        for (int i = 0; i < outputs.size(); i++) {
114            List<? extends Feature> list = features.get(i);
115            Example<T> example = null;
116            if(attemptBinaryFeatures){
117                try {
118                    example = new BinaryFeaturesExample<>(outputs.get(i), list);
119                } catch(IllegalArgumentException iae){
120                    logger.finer("attempted to create BinaryFeaturesExample but not all of the features were binary");
121                    example = new ArrayExample<>(outputs.get(i), list);
122                }
123            } else {
124                example = new ArrayExample<>(outputs.get(i), list);
125            }
126            examples.add(example);
127        }
128
129        this.examples = examples;
130        this.weight = weight;
131    }
132
133    /**
134     * Creates a deep copy of the supplied sequence example.
135     * @param other The sequence example to copy.
136     */
137    public SequenceExample(SequenceExample<T> other) {
138        this.examples = new ArrayList<>(other.size());
139        for(Example<T> example : other) {
140            examples.add(example.copy());
141        }
142        this.weight = other.weight;
143    }
144
145    /**
146     * Return how many examples are in this sequence.
147     * @return The number of examples.
148     */
149    public int size() {
150        return examples.size();
151    }
152
153    /**
154     * Removes the features in the supplied list from each example contained in this sequence.
155     * @param features The features to remove.
156     */
157    public void removeFeatures(List<Feature> features) {
158        for (Example<T> e : examples) {
159            e.removeFeatures(features);
160        }
161    }
162
163    /**
164     * Gets the example found at the specified index.
165     * @param i The index to lookup.
166     * @return The {@link Example} for index i.
167     */
168    public Example<T> get(int i) {
169        return examples.get(i);
170    }
171
172    /**
173     * Checks that each {@link Example} in this sequence is valid.
174     * @return True if each {@link Example} is valid, false otherwise.
175     */
176    public boolean validateExample() {
177        if (examples.isEmpty()) {
178            return false;
179        } else {
180            boolean valid = true;
181            for (Example<T> e : examples) {
182                valid &= e.validateExample();
183            }
184            return valid;
185        }
186    }
187
188    /**
189     * Reduces the features in each example using the supplied {@link Merger}.
190     * @param merger The merger to use in the reduction.
191     */
192    public void reduceByName(Merger merger) {
193        for (Example<T> e : examples) {
194            e.reduceByName(merger);
195        }
196    }
197
198    /**
199     * Sets the weight of this sequence.
200     * @param weight The new weight.
201     */
202    public void setWeight(float weight) {
203        this.weight = weight;
204    }
205
206    /**
207     * Gets the weight of this sequence.
208     * @return The weight of this sequence.
209     */
210    public float getWeight() {
211        return weight;
212    }
213
214    /**
215     * Adds an {@link Example} to this sequence.
216     * @param e The example to add.
217     */
218    public void addExample(Example<T> e) {
219        examples.add(e);
220    }
221
222    /**
223     * Returns a deep copy of this SequenceExample.
224     * @return A deep copy.
225     */
226    public SequenceExample<T> copy() {
227        return new SequenceExample<>(this);
228    }
229
230    @Override
231    public Iterator<Example<T>> iterator() {
232        return examples.iterator();
233    }
234
235    /**
236     * Creates an iterator over every feature in this sequence.
237     * @return An iterator over features.
238     */
239    public Iterator<Feature> featureIterator() {
240        return new FeatureIterator<>(iterator());
241    }
242
243    /**
244     * Reassigns feature name Strings in each Example inside this SequenceExample to point to
245     * those in the {@link FeatureMap}. This significantly reduces memory allocation. It is called
246     * when a SequenceExample is added to a {@link MutableSequenceDataset}, and should not be
247     * called outside of that context as it may interact unexpectedly with
248     * {@link HashedFeatureMap}.
249     * @param featureMap The feature map containing canonical feature names.
250     */
251    public void canonicalise(FeatureMap featureMap) {
252        for (Example<T> e : examples) {
253            e.canonicalize(featureMap);
254        }
255    }
256
257    /**
258     * Creates a SequenceExample using {@link OutputFactory#getUnknownOutput()} as the output for each
259     * sequence element.
260     * <p>
261     * Note: this method is used to create SequenceExamples at prediction time where there is no
262     * ground truth {@link Output}.
263     * @param features The features for each sequence element.
264     * @param outputFactory The output factory to use.
265     * @param <T> The type of the {@link Output}.
266     * @return A new SequenceExample.
267     */
268    public static <T extends Output<T>> SequenceExample<T> createWithEmptyOutputs(List<? extends List<? extends Feature>> features, OutputFactory<T> outputFactory) {
269        ArrayList<Example<T>> examples = new ArrayList<>(features.size());
270
271        for (List<? extends Feature> list : features) {
272            ArrayExample<T> example = new ArrayExample<>(outputFactory.getUnknownOutput());
273            example.addAll(list);
274            examples.add(example);
275        }
276
277        return new SequenceExample<>(examples);
278    }
279
280    private static class FeatureIterator<T extends Output<T>> implements Iterator<Feature> {
281        private final Iterator<Example<T>> itr;
282        private Iterator<Feature> featureItr;
283
284        public FeatureIterator(Iterator<Example<T>> e) {
285            itr = e;
286        }
287
288        @Override
289        public boolean hasNext() {
290            if ((featureItr != null) && (featureItr.hasNext())) {
291                return true;
292            } else if (itr.hasNext()) {
293                while (itr.hasNext()) {
294                    featureItr = itr.next().iterator();
295                    if (featureItr.hasNext()) {
296                        return true;
297                    }
298                }
299                return false;
300            } else {
301                return false;
302            }
303        }
304
305        @Override
306        public Feature next() {
307            if (featureItr != null) {
308                return featureItr.next();
309            } else {
310                return null;
311            }
312        }
313    }
314}
315