001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo; 018 019/** 020 * A feature map that can record new feature value observations. 021 */ 022public class MutableFeatureMap extends FeatureMap { 023 private static final long serialVersionUID = 2L; 024 025 private final boolean convertHighCardinality; 026 027 /** 028 * Creates an empty feature map which converts high cardinality categorical variable infos into reals. 029 * <p> 030 * The conversion threshold is {@link CategoricalInfo#THRESHOLD}. 031 */ 032 public MutableFeatureMap() { 033 this(true); 034 } 035 036 /** 037 * Creates an empty feature map which can optionally convert high cardinality categorical variable infos into reals. 038 * <p> 039 * The conversion threshold is {@link CategoricalInfo#THRESHOLD}. 040 * @param convertHighCardinality Should this feature map convert high cardinality categorical variables into real variables? 041 */ 042 public MutableFeatureMap(boolean convertHighCardinality) { 043 super(); 044 this.convertHighCardinality = convertHighCardinality; 045 } 046 047 /** 048 * Adds a variable info into the feature map. 049 * <p> 050 * Returns the old one if there was a name collision, otherwise returns null. 051 * @param info The info to add. 052 * @return The old variable info or null. 053 */ 054 public VariableInfo put(VariableInfo info) { 055 VariableInfo old = m.put(info.getName(), info); 056 return old; 057 } 058 059 /** 060 * Adds an occurrence of a feature with a given name. 061 * 062 * @param name the name of the feature. 063 * @param value the observed value of that feature. 064 */ 065 public void add(String name, double value) { 066 SkeletalVariableInfo info = (SkeletalVariableInfo) m.computeIfAbsent(name, CategoricalInfo::new); 067 info.observe(value); 068 069 // If there are too many categories, convert into a real info and drop the old categorical info. 070 if (convertHighCardinality && info instanceof CategoricalInfo) { 071 CategoricalInfo cInfo = (CategoricalInfo) info; 072 if (cInfo.getUniqueObservations() > CategoricalInfo.THRESHOLD) { 073 m.put(name,cInfo.generateRealInfo()); 074 } 075 } 076 } 077 078 /** 079 * Clears all the feature observations. 080 */ 081 public void clear() { 082 m.clear(); 083 } 084 085}