001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo;
018
019import com.oracle.labs.mlrg.olcut.config.Configurable;
020import com.oracle.labs.mlrg.olcut.provenance.Provenancable;
021import com.oracle.labs.mlrg.olcut.provenance.Provenance;
022import org.tribuo.provenance.TrainerProvenance;
023
024import java.util.Collections;
025import java.util.Map;
026
027/**
028 * An interface for things that can train predictive models.
029 * @param <T> the type of the {@link Output} in the examples
030 */
031public interface Trainer<T extends Output<T>> extends Configurable, Provenancable<TrainerProvenance> {
032
033    /**
034     * Default seed used to initialise RNGs.
035     */
036    public static long DEFAULT_SEED = 12345L;
037    
038    /**
039     * Trains a predictive model using the examples in the given data set.
040     * @param examples the data set containing the examples.
041     * @return a predictive model that can be used to generate predictions for new examples.
042     */
043    default public Model<T> train(Dataset<T> examples) {
044        return train(examples, Collections.emptyMap());
045    }
046
047    /**
048     * Trains a predictive model using the examples in the given data set.
049     * @param examples the data set containing the examples.
050     * @param runProvenance Training run specific provenance (e.g., fold number).
051     * @return a predictive model that can be used to generate predictions for new examples.
052     */
053    public Model<T> train(Dataset<T> examples, Map<String, Provenance> runProvenance);
054
055    /**
056     * The number of times this trainer instance has had it's train method invoked.
057     * <p>
058     * This is used to determine how many times the trainer's RNG has been accessed
059     * to ensure replicability in the random number stream.
060     * @return The number of train invocations.
061     */
062    public int getInvocationCount();
063}