Source code

001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.regression.liblinear;
018
019import com.oracle.labs.mlrg.olcut.util.Pair;
020import org.tribuo.Example;
021import org.tribuo.Excuse;
022import org.tribuo.Feature;
023import org.tribuo.ImmutableFeatureMap;
024import org.tribuo.ImmutableOutputInfo;
025import org.tribuo.Model;
026import org.tribuo.Prediction;
027import org.tribuo.common.liblinear.LibLinearModel;
028import org.tribuo.common.liblinear.LibLinearTrainer;
029import org.tribuo.provenance.ModelProvenance;
030import org.tribuo.regression.Regressor;
031import de.bwaldvogel.liblinear.FeatureNode;
032import de.bwaldvogel.liblinear.Linear;
033
034import java.util.ArrayList;
035import java.util.Collections;
036import java.util.Comparator;
037import java.util.HashMap;
038import java.util.List;
039import java.util.Map;
040import java.util.PriorityQueue;
041import java.util.logging.Logger;
042
043/**
044 * A {@link Model} which wraps a LibLinear-java model.
045 * <p>
046 * It disables the LibLinear debug output as it's very chatty.
047 * <p>
048 * It contains an independent liblinear model for each regression dimension.
049 * <p>
050 * See:
051 * <pre>
052 * Fan RE, Chang KW, Hsieh CJ, Wang XR, Lin CJ.
053 * "LIBLINEAR: A library for Large Linear Classification"
054 * Journal of Machine Learning Research, 2008.
055 * </pre>
056 * and for the original algorithm:
057 * <pre>
058 * Cortes C, Vapnik V.
059 * "Support-Vector Networks"
060 * Machine Learning, 1995.
061 * </pre>
062 */
063public class LibLinearRegressionModel extends LibLinearModel<Regressor> {
064    private static final long serialVersionUID = 2L;
065
066    private static final Logger logger = Logger.getLogger(LibLinearRegressionModel.class.getName());
067
068    private final String[] dimensionNames;
069
070    LibLinearRegressionModel(String name, ModelProvenance description, ImmutableFeatureMap featureIDMap, ImmutableOutputInfo<Regressor> outputInfo, List<de.bwaldvogel.liblinear.Model> models) {
071        super(name, description, featureIDMap, outputInfo, false, models);
072        this.dimensionNames = Regressor.extractNames(outputInfo);
073    }
074
075    @Override
076    public Prediction<Regressor> predict(Example<Regressor> example) {
077        FeatureNode[] features = LibLinearTrainer.exampleToNodes(example, featureIDMap, null);
078        // Bias feature is always set
079        if (features.length == 1) {
080            throw new IllegalArgumentException("No features found in Example " + example.toString());
081        }
082
083        double[] scores = new double[models.get(0).getNrClass()];
084        double[] regressedValues = new double[models.size()];
085
086        for (int i = 0; i < regressedValues.length; i++) {
087            regressedValues[i] = Linear.predictValues(models.get(i),features,scores);
088        }
089
090        Regressor regressor = new Regressor(dimensionNames,regressedValues);
091        return new Prediction<>(regressor, features.length - 1, example);
092    }
093
094    @Override
095    public Map<String, List<Pair<String, Double>>> getTopFeatures(int n) {
096        int maxFeatures = n < 0 ? featureIDMap.size() : n;
097        double[][] featureWeights = getFeatureWeights();
098
099        Comparator<Pair<String, Double>> comparator = Comparator.comparingDouble(p -> Math.abs(p.getB()));
100        Map<String, List<Pair<String, Double>>> map = new HashMap<>();
101        PriorityQueue<Pair<String, Double>> q = new PriorityQueue<>(maxFeatures, comparator);
102
103        for (int i = 0; i < models.size(); i++) {
104            int numFeatures = models.get(i).getNrFeature();
105            for (int j = 0; j < numFeatures; j++) {
106                Pair<String, Double> cur = new Pair<>(featureIDMap.get(j).getName(), featureWeights[i][j]);
107                if (maxFeatures < 0 || q.size() < maxFeatures) {
108                    q.offer(cur);
109                } else if (comparator.compare(cur, q.peek()) > 0) {
110                    q.poll();
111                    q.offer(cur);
112                }
113            }
114            List<Pair<String, Double>> list = new ArrayList<>();
115            while (q.size() > 0) {
116                list.add(q.poll());
117            }
118            Collections.reverse(list);
119            map.put(dimensionNames[i], list);
120        }
121
122        return map;
123    }
124
125    @Override
126    protected LibLinearRegressionModel copy(String newName, ModelProvenance newProvenance) {
127        List<de.bwaldvogel.liblinear.Model> newModels = new ArrayList<>();
128        for (de.bwaldvogel.liblinear.Model m : models) {
129            newModels.add(copyModel(m));
130        }
131        return new LibLinearRegressionModel(newName,newProvenance,featureIDMap,outputIDInfo,newModels);
132    }
133
134    @Override
135    protected double[][] getFeatureWeights() {
136        double[][] featureWeights = new double[models.size()][];
137
138        for (int i = 0; i < models.size(); i++) {
139            featureWeights[i] = models.get(i).getFeatureWeights();
140        }
141
142        return featureWeights;
143    }
144
145    /**
146     * The call to model.getFeatureWeights in the public methods copies the
147     * weights array so this inner method exists to save the copy in getExcuses.
148     * <p>
149     * If it becomes a problem then we could cache the feature weights in the
150     * model.
151     * <p>
152     * @param e The example.
153     * @param allFeatureWeights The feature weights.
154     * @return An excuse for this example.
155     */
156    @Override
157    protected Excuse<Regressor> innerGetExcuse(Example<Regressor> e, double[][] allFeatureWeights) {
158        Prediction<Regressor> prediction = predict(e);
159        Map<String, List<Pair<String, Double>>> weightMap = new HashMap<>();
160        for (int i = 0; i < allFeatureWeights.length; i++) {
161            List<Pair<String, Double>> scores = new ArrayList<>();
162            for (Feature f : e) {
163                int id = featureIDMap.getID(f.getName());
164                if (id > -1) {
165                    double score = allFeatureWeights[i][id] * f.getValue();
166                    scores.add(new Pair<>(f.getName(), score));
167                }
168            }
169            scores.sort((o1, o2) -> o2.getB().compareTo(o1.getB()));
170            weightMap.put(dimensionNames[i], scores);
171        }
172
173        return new Excuse<>(e, prediction, weightMap);
174    }
175}