001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo; 018 019import com.oracle.labs.mlrg.olcut.config.Configurable; 020import com.oracle.labs.mlrg.olcut.provenance.Provenancable; 021import com.oracle.labs.mlrg.olcut.provenance.Provenance; 022import org.tribuo.provenance.TrainerProvenance; 023 024import java.util.Collections; 025import java.util.Map; 026 027/** 028 * An interface for things that can train predictive models. 029 * @param <T> the type of the {@link Output} in the examples 030 */ 031public interface Trainer<T extends Output<T>> extends Configurable, Provenancable<TrainerProvenance> { 032 033 /** 034 * Default seed used to initialise RNGs. 035 */ 036 public static long DEFAULT_SEED = 12345L; 037 038 /** 039 * Trains a predictive model using the examples in the given data set. 040 * @param examples the data set containing the examples. 041 * @return a predictive model that can be used to generate predictions for new examples. 042 */ 043 default public Model<T> train(Dataset<T> examples) { 044 return train(examples, Collections.emptyMap()); 045 } 046 047 /** 048 * Trains a predictive model using the examples in the given data set. 049 * @param examples the data set containing the examples. 050 * @param runProvenance Training run specific provenance (e.g., fold number). 051 * @return a predictive model that can be used to generate predictions for new examples. 052 */ 053 public Model<T> train(Dataset<T> examples, Map<String, Provenance> runProvenance); 054 055 /** 056 * The number of times this trainer instance has had it's train method invoked. 057 * <p> 058 * This is used to determine how many times the trainer's RNG has been accessed 059 * to ensure replicability in the random number stream. 060 * @return The number of train invocations. 061 */ 062 public int getInvocationCount(); 063}