001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo;
018
019import java.io.Serializable;
020import java.util.HashMap;
021import java.util.List;
022import java.util.Map;
023import java.util.TreeMap;
024
025/**
026 * ImmutableFeatureMap is used when unknown features should not be added to the FeatureMap.
027 * <p>
028 * It's also got feature ids as those are only generated for immutable maps.
029 * <p>
030 * The feature ids are generated by sorting the feature names by the String comparator.
031 * This ensures that any Example with sorted names has sorted int ids, even if some of
032 * those features are unobserved. This is an extremely important property of {@link Feature}s,
033 * {@link Example}s and {@link ImmutableFeatureMap}.
034 */
035public class ImmutableFeatureMap extends FeatureMap implements Serializable {
036    private static final long serialVersionUID = 1L;
037
038    /**
039     * The map from id numbers to the feature infos.
040     */
041    protected final Map<Integer,VariableIDInfo> idMap;
042
043    /**
044     * The number of features.
045     */
046    protected int size;
047
048    /**
049     * Constructs a new immutable version which is a deep copy of the supplied feature map, generating new ID numbers.
050     * <p>
051     * The new id numbers will be the same as the old ones (if they existed) assuming this is a regular feature map.
052     * @param map The map to copy.
053     */
054    public ImmutableFeatureMap(FeatureMap map) {
055        this(generateIDs(map));
056    }
057
058    /**
059     * Constructs a new immutable feature map copying the supplied variable infos and generating appropriate ID numbers.
060     * @param infoList The variable infos.
061     */
062    public ImmutableFeatureMap(List<VariableInfo> infoList) {
063        this(generateIDs(infoList));
064    }
065
066    private ImmutableFeatureMap(Map<String,VariableIDInfo> map) {
067        super(map);
068        idMap = new HashMap<>();
069        for (Map.Entry<String, VariableInfo> e : m.entrySet()) {
070            VariableIDInfo idInfo = (VariableIDInfo) e.getValue();
071            idMap.put(idInfo.getID(),idInfo);
072        }
073        size = m.size();
074    }
075
076    /**
077     * Constructs a new empty immutable feature map.
078     * <p>
079     * Used for mocking feature domains in tests.
080     */
081    protected ImmutableFeatureMap() {
082        super();
083        idMap = new HashMap<>();
084    }
085
086    /**
087     * Gets the {@link VariableIDInfo}
088     * for this id number. Returns null if it's unknown.
089     * @param id The id number to lookup.
090     * @return The VariableInfo, or null.
091     */
092    public VariableIDInfo get(int id) {
093        return idMap.get(id);
094    }
095
096    /**
097     * Gets the {@link VariableIDInfo}
098     * for this name. Returns null if it's unknown.
099     * @param name The name to lookup.
100     * @return The VariableInfo, or null.
101     */
102    @Override
103    public VariableIDInfo get(String name) {
104        return (VariableIDInfo) super.get(name);
105    }
106
107    /**
108     * Gets the id number for this feature, returns -1 if it's unknown.
109     * @param name The name of the feature.
110     * @return A non-negative integer if the feature is known, -1 otherwise.
111     */
112    public int getID(String name) {
113        VariableIDInfo info = get(name);
114        if (info != null) {
115            return info.getID();
116        } else {
117            return -1;
118        }
119    }
120
121    @Override
122    public int size() {
123        return size;
124    }
125
126    /**
127     * Generates the feature ids by sorting the features with the String comparator,
128     * then sequentially numbering them.
129     * @param map A feature map to convert.
130     * @return A map from feature names to VariableIDInfo objects.
131     */
132    public static Map<String,VariableIDInfo> generateIDs(FeatureMap map) {
133        TreeMap<String,VariableInfo> sortedMap = new TreeMap<>(map.m);
134        return generateIDs(sortedMap);
135    }
136
137    /**
138     * Generates the feature ids by sorting the features with the String comparator,
139     * then sequentially numbering them.
140     * @param list A list of {@link VariableInfo}s to generate a map from.
141     * @return A map from feature names to VariableIDInfo objects.
142     */
143    public static Map<String,VariableIDInfo> generateIDs(List<? extends VariableInfo> list) {
144        TreeMap<String,VariableInfo> sortedMap = new TreeMap<>();
145        for (VariableInfo m : list) {
146            sortedMap.put(m.getName(),m);
147        }
148        return generateIDs(sortedMap);
149    }
150
151    /**
152     * Generates the feature ids from a sorted Map.
153     * @param sortedMap A sorted map of the VariableInfos.
154     * @return A map from feature names to VariableIDInfo objects.
155     */
156    private static Map<String, VariableIDInfo> generateIDs(TreeMap<String, VariableInfo> sortedMap) {
157        Map<String,VariableIDInfo> outputMap = new HashMap<>();
158        int counter = 0;
159        for (Map.Entry<String, VariableInfo> e : sortedMap.entrySet()) {
160            VariableIDInfo newInfo = e.getValue().makeIDInfo(counter);
161            outputMap.put(e.getKey(),newInfo);
162            counter++;
163        }
164        return outputMap;
165    }
166
167}