001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.data.text.impl; 018 019import com.oracle.labs.mlrg.olcut.config.Config; 020import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance; 021import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl; 022import org.tribuo.Example; 023import org.tribuo.Feature; 024import org.tribuo.Output; 025import org.tribuo.data.text.TextFeatureExtractor; 026import org.tribuo.data.text.TextPipeline; 027import org.tribuo.impl.ArrayExample; 028import org.tribuo.impl.ListExample; 029 030import java.util.List; 031 032/** 033 * An implementation of {@link TextFeatureExtractor} that takes a 034 * {@link TextPipeline} and generates {@link ListExample}. 035 */ 036public class TextFeatureExtractorImpl<T extends Output<T>> implements TextFeatureExtractor<T> { 037 038 @Config(mandatory=true,description="The text processing pipeline.") 039 private TextPipeline pipeline; 040 041 /** 042 * for olcut 043 */ 044 private TextFeatureExtractorImpl() {} 045 046 public TextFeatureExtractorImpl(TextPipeline pipeline) { 047 this.pipeline = pipeline; 048 } 049 050 @Override 051 public String toString() { 052 return "TextFeatureExtractorImpl(pipeline="+pipeline.toString()+")"; 053 } 054 055 @Override 056 public Example<T> extract(T label, String data) { 057 ArrayExample<T> example = new ArrayExample<>(label); 058 List<Feature> features = pipeline.process("",data); 059 060 example.addAll(features); 061 062 return example; 063 } 064 065 @Override 066 public ConfiguredObjectProvenance getProvenance() { 067 return new ConfiguredObjectProvenanceImpl(this,"TextFeatureExtractor"); 068 } 069}