001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.data.text.impl;
018
019import com.oracle.labs.mlrg.olcut.config.Config;
020import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
021import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
022import org.tribuo.Feature;
023import org.tribuo.data.text.FeatureAggregator;
024
025import java.util.ArrayList;
026import java.util.HashMap;
027import java.util.List;
028import java.util.Map;
029
030/**
031 * Aggregates feature tokens, generating unique features.
032 */
033public class UniqueAggregator implements FeatureAggregator {
034
035    private final ThreadLocal<Map<String,Double>> map = ThreadLocal.withInitial(HashMap::new);
036
037    @Config(description="Value to emit, if unset emits the last value observed for that token.")
038    private double value = Double.NaN;
039
040    public UniqueAggregator(double value) {
041        this.value = value;
042    }
043
044    public UniqueAggregator() { }
045
046    @Override
047    public List<Feature> aggregate(List<Feature> input) {
048        Map<String,Double> curMap = map.get();
049        curMap.clear();
050
051        for (Feature f : input) {
052            curMap.put(f.getName(),f.getValue());
053        }
054
055        List<Feature> features = new ArrayList<>();
056
057        for (Map.Entry<String,Double> e : curMap.entrySet()) {
058            double tmpValue;
059            if (Double.isNaN(value)) {
060                tmpValue = e.getValue();
061            } else {
062                tmpValue = value;
063            }
064            features.add(new Feature(e.getKey(),tmpValue));
065        }
066
067        return features;
068    }
069
070    @Override
071    public ConfiguredObjectProvenance getProvenance() {
072        return new ConfiguredObjectProvenanceImpl(this,"FeatureAggregator");
073    }
074}