001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.data.text.impl; 018 019import com.oracle.labs.mlrg.olcut.config.Config; 020import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance; 021import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl; 022import org.tribuo.Feature; 023import org.tribuo.data.text.FeatureAggregator; 024 025import java.util.ArrayList; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029 030/** 031 * Aggregates feature tokens, generating unique features. 032 */ 033public class UniqueAggregator implements FeatureAggregator { 034 035 private final ThreadLocal<Map<String,Double>> map = ThreadLocal.withInitial(HashMap::new); 036 037 @Config(description="Value to emit, if unset emits the last value observed for that token.") 038 private double value = Double.NaN; 039 040 public UniqueAggregator(double value) { 041 this.value = value; 042 } 043 044 public UniqueAggregator() { } 045 046 @Override 047 public List<Feature> aggregate(List<Feature> input) { 048 Map<String,Double> curMap = map.get(); 049 curMap.clear(); 050 051 for (Feature f : input) { 052 curMap.put(f.getName(),f.getValue()); 053 } 054 055 List<Feature> features = new ArrayList<>(); 056 057 for (Map.Entry<String,Double> e : curMap.entrySet()) { 058 double tmpValue; 059 if (Double.isNaN(value)) { 060 tmpValue = e.getValue(); 061 } else { 062 tmpValue = value; 063 } 064 features.add(new Feature(e.getKey(),tmpValue)); 065 } 066 067 return features; 068 } 069 070 @Override 071 public ConfiguredObjectProvenance getProvenance() { 072 return new ConfiguredObjectProvenanceImpl(this,"FeatureAggregator"); 073 } 074}