001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.data.text.impl; 018 019import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance; 020import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl; 021import org.tribuo.Feature; 022import org.tribuo.data.text.FeatureAggregator; 023 024import java.util.ArrayList; 025import java.util.HashMap; 026import java.util.List; 027import java.util.Map; 028 029/** 030 * A feature aggregator that averages feature values across a feature list. 031 */ 032public class AverageAggregator implements FeatureAggregator { 033 034 private final ThreadLocal<Map<String,Double>> map = ThreadLocal.withInitial(HashMap::new); 035 private final ThreadLocal<Map<String,Integer>> countMap = ThreadLocal.withInitial(HashMap::new); 036 037 @Override 038 public List<Feature> aggregate(List<Feature> input) { 039 Map<String,Double> curMap = map.get(); 040 Map<String,Integer> curCountMap = countMap.get(); 041 curMap.clear(); 042 curCountMap.clear(); 043 044 for (Feature f : input) { 045 double curValue = f.getValue(); 046 curMap.merge(f.getName(),curValue,Double::sum); 047 curCountMap.merge(f.getName(),1,Integer::sum); 048 } 049 050 List<Feature> features = new ArrayList<>(); 051 052 for (Map.Entry<String,Double> e : curMap.entrySet()) { 053 int count = curCountMap.get(e.getKey()); 054 features.add(new Feature(e.getKey(),e.getValue()/count)); 055 } 056 057 return features; 058 } 059 060 @Override 061 public ConfiguredObjectProvenance getProvenance() { 062 return new ConfiguredObjectProvenanceImpl(this,"FeatureAggregator"); 063 } 064}