001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo; 018 019import java.io.Serializable; 020import java.util.HashMap; 021import java.util.List; 022import java.util.Map; 023import java.util.TreeMap; 024 025/** 026 * ImmutableFeatureMap is used when unknown features should not be added to the FeatureMap. 027 * <p> 028 * It's also got feature ids as those are only generated for immutable maps. 029 * <p> 030 * The feature ids are generated by sorting the feature names by the String comparator. 031 * This ensures that any Example with sorted names has sorted int ids, even if some of 032 * those features are unobserved. This is an extremely important property of {@link Feature}s, 033 * {@link Example}s and {@link ImmutableFeatureMap}. 034 */ 035public class ImmutableFeatureMap extends FeatureMap implements Serializable { 036 private static final long serialVersionUID = 1L; 037 038 /** 039 * The map from id numbers to the feature infos. 040 */ 041 protected final Map<Integer,VariableIDInfo> idMap; 042 043 /** 044 * The number of features. 045 */ 046 protected int size; 047 048 /** 049 * Constructs a new immutable version which is a deep copy of the supplied feature map, generating new ID numbers. 050 * <p> 051 * The new id numbers will be the same as the old ones (if they existed) assuming this is a regular feature map. 052 * @param map The map to copy. 053 */ 054 public ImmutableFeatureMap(FeatureMap map) { 055 this(generateIDs(map)); 056 } 057 058 /** 059 * Constructs a new immutable feature map copying the supplied variable infos and generating appropriate ID numbers. 060 * @param infoList The variable infos. 061 */ 062 public ImmutableFeatureMap(List<VariableInfo> infoList) { 063 this(generateIDs(infoList)); 064 } 065 066 private ImmutableFeatureMap(Map<String,VariableIDInfo> map) { 067 super(map); 068 idMap = new HashMap<>(); 069 for (Map.Entry<String, VariableInfo> e : m.entrySet()) { 070 VariableIDInfo idInfo = (VariableIDInfo) e.getValue(); 071 idMap.put(idInfo.getID(),idInfo); 072 } 073 size = m.size(); 074 } 075 076 /** 077 * Constructs a new empty immutable feature map. 078 * <p> 079 * Used for mocking feature domains in tests. 080 */ 081 protected ImmutableFeatureMap() { 082 super(); 083 idMap = new HashMap<>(); 084 } 085 086 /** 087 * Gets the {@link VariableIDInfo} 088 * for this id number. Returns null if it's unknown. 089 * @param id The id number to lookup. 090 * @return The VariableInfo, or null. 091 */ 092 public VariableIDInfo get(int id) { 093 return idMap.get(id); 094 } 095 096 /** 097 * Gets the {@link VariableIDInfo} 098 * for this name. Returns null if it's unknown. 099 * @param name The name to lookup. 100 * @return The VariableInfo, or null. 101 */ 102 @Override 103 public VariableIDInfo get(String name) { 104 return (VariableIDInfo) super.get(name); 105 } 106 107 /** 108 * Gets the id number for this feature, returns -1 if it's unknown. 109 * @param name The name of the feature. 110 * @return A non-negative integer if the feature is known, -1 otherwise. 111 */ 112 public int getID(String name) { 113 VariableIDInfo info = get(name); 114 if (info != null) { 115 return info.getID(); 116 } else { 117 return -1; 118 } 119 } 120 121 @Override 122 public int size() { 123 return size; 124 } 125 126 /** 127 * Generates the feature ids by sorting the features with the String comparator, 128 * then sequentially numbering them. 129 * @param map A feature map to convert. 130 * @return A map from feature names to VariableIDInfo objects. 131 */ 132 public static Map<String,VariableIDInfo> generateIDs(FeatureMap map) { 133 TreeMap<String,VariableInfo> sortedMap = new TreeMap<>(map.m); 134 return generateIDs(sortedMap); 135 } 136 137 /** 138 * Generates the feature ids by sorting the features with the String comparator, 139 * then sequentially numbering them. 140 * @param list A list of {@link VariableInfo}s to generate a map from. 141 * @return A map from feature names to VariableIDInfo objects. 142 */ 143 public static Map<String,VariableIDInfo> generateIDs(List<? extends VariableInfo> list) { 144 TreeMap<String,VariableInfo> sortedMap = new TreeMap<>(); 145 for (VariableInfo m : list) { 146 sortedMap.put(m.getName(),m); 147 } 148 return generateIDs(sortedMap); 149 } 150 151 /** 152 * Generates the feature ids from a sorted Map. 153 * @param sortedMap A sorted map of the VariableInfos. 154 * @return A map from feature names to VariableIDInfo objects. 155 */ 156 private static Map<String, VariableIDInfo> generateIDs(TreeMap<String, VariableInfo> sortedMap) { 157 Map<String,VariableIDInfo> outputMap = new HashMap<>(); 158 int counter = 0; 159 for (Map.Entry<String, VariableInfo> e : sortedMap.entrySet()) { 160 VariableIDInfo newInfo = e.getValue().makeIDInfo(counter); 161 outputMap.put(e.getKey(),newInfo); 162 counter++; 163 } 164 return outputMap; 165 } 166 167}