001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.data.text.impl;
018
019import com.oracle.labs.mlrg.olcut.config.Config;
020import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
021import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
022import org.tribuo.Example;
023import org.tribuo.Feature;
024import org.tribuo.Output;
025import org.tribuo.data.text.TextFeatureExtractor;
026import org.tribuo.data.text.TextPipeline;
027import org.tribuo.impl.ArrayExample;
028import org.tribuo.impl.ListExample;
029
030import java.util.List;
031
032/**
033 * An implementation of {@link TextFeatureExtractor} that takes a
034 * {@link TextPipeline} and generates {@link ListExample}.
035 */
036public class TextFeatureExtractorImpl<T extends Output<T>> implements TextFeatureExtractor<T> {
037
038    @Config(mandatory=true,description="The text processing pipeline.")
039    private TextPipeline pipeline;
040
041    /**
042     * for olcut
043     */
044    private TextFeatureExtractorImpl() {}
045    
046    public TextFeatureExtractorImpl(TextPipeline pipeline) {
047        this.pipeline = pipeline;
048    }
049
050    @Override
051    public String toString() {
052        return "TextFeatureExtractorImpl(pipeline="+pipeline.toString()+")";
053    }
054
055    @Override
056    public Example<T> extract(T label, String data) {
057        ArrayExample<T> example = new ArrayExample<>(label);
058        List<Feature> features = pipeline.process("",data);
059
060        example.addAll(features);
061
062        return example;
063    }
064
065    @Override
066    public ConfiguredObjectProvenance getProvenance() {
067        return new ConfiguredObjectProvenanceImpl(this,"TextFeatureExtractor");
068    }
069}