001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo;
018
019/**
020 * A feature map that can record new feature value observations.
021 */
022public class MutableFeatureMap extends FeatureMap {
023    private static final long serialVersionUID = 2L;
024
025    private final boolean convertHighCardinality;
026
027    /**
028     * Creates an empty feature map which converts high cardinality categorical variable infos into reals.
029     * <p>
030     * The conversion threshold is {@link CategoricalInfo#THRESHOLD}.
031     */
032    public MutableFeatureMap() {
033        this(true);
034    }
035
036    /**
037     * Creates an empty feature map which can optionally convert high cardinality categorical variable infos into reals.
038     * <p>
039     * The conversion threshold is {@link CategoricalInfo#THRESHOLD}.
040     * @param convertHighCardinality Should this feature map convert high cardinality categorical variables into real variables?
041     */
042    public MutableFeatureMap(boolean convertHighCardinality) {
043        super();
044        this.convertHighCardinality = convertHighCardinality;
045    }
046
047    /**
048     * Adds a variable info into the feature map.
049     * <p>
050     * Returns the old one if there was a name collision, otherwise returns null.
051     * @param info The info to add.
052     * @return The old variable info or null.
053     */
054    public VariableInfo put(VariableInfo info) {
055        VariableInfo old = m.put(info.getName(), info);
056        return old;
057    }
058
059    /**
060     * Adds an occurrence of a feature with a given name.
061     *
062     * @param name the name of the feature.
063     * @param value the observed value of that feature.
064     */
065    public void add(String name, double value) {
066        SkeletalVariableInfo info = (SkeletalVariableInfo) m.computeIfAbsent(name, CategoricalInfo::new);
067        info.observe(value);
068
069        // If there are too many categories, convert into a real info and drop the old categorical info.
070        if (convertHighCardinality && info instanceof CategoricalInfo) {
071            CategoricalInfo cInfo = (CategoricalInfo) info;
072            if (cInfo.getUniqueObservations() > CategoricalInfo.THRESHOLD) {
073                m.put(name,cInfo.generateRealInfo());
074            }
075        }
076    }
077
078    /**
079     * Clears all the feature observations.
080     */
081    public void clear() {
082        m.clear();
083    }
084
085}