001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.data.columnar.extractors;
018
019import com.oracle.labs.mlrg.olcut.config.Config;
020import org.tribuo.data.columnar.ColumnarIterator;
021import org.tribuo.data.columnar.FieldExtractor;
022
023import java.util.Optional;
024import java.util.logging.Logger;
025
026/**
027 * Extracts a value from a single field to be placed in an {@link org.tribuo.Example}'s metadata field.
028 */
029public abstract class SimpleFieldExtractor<T> implements FieldExtractor<T> {
030
031    private static final Logger logger = Logger.getLogger(SimpleFieldExtractor.class.getName());
032
033    @Config(mandatory = true,description="The field name to read.")
034    protected String fieldName;
035
036    @Config(description="The metadata key to emit, defaults to field name if unpopulated")
037    protected String metadataName;
038
039    /**
040     * Constructs a simple field extractor which reads from the supplied field name and
041     * writes out to a metadata field with the same name.
042     * @param fieldName The field name to read from.
043     */
044    protected SimpleFieldExtractor(String fieldName) {
045        this(fieldName, fieldName);
046    }
047
048    /**
049     * Constructs a simple field extractor with the supplied field name and metadata field name.
050     * @param fieldName The field name to read.
051     * @param metadataName The metadata field name to write to.
052     */
053    protected SimpleFieldExtractor(String fieldName, String metadataName) {
054        this.fieldName = fieldName;
055        this.metadataName = metadataName;
056    }
057
058    /**
059     * For olcut.
060     */
061    protected SimpleFieldExtractor() {}
062
063    /**
064     * Used by the OLCUT configuration system, and should not be called by external code.
065     */
066    @Override
067    public void postConfig() {
068        if (metadataName == null || metadataName.isEmpty()) {
069            metadataName = fieldName;
070        }
071    }
072
073    /**
074     * Gets the field name this extractor operates on.
075     * @return The field name.
076     */
077    public String getFieldName() {
078        return fieldName;
079    }
080
081    /**
082     * Gets the metadata key name. This is the key into which this value will be written in an {@link org.tribuo.Example}
083     * if it is given to {@link org.tribuo.data.columnar.RowProcessor}.
084     * <p>
085     * Defaults to the field name.
086     * @return The metadata key name.
087     */
088    @Override
089    public String getMetadataName() {
090        return metadataName;
091    }
092
093    /**
094     * Extracts the field value, or returns {@link Optional#empty} if it failed to parse.
095     * @param fieldValue The field value to read.
096     * @return The extracted value.
097     */
098    protected abstract Optional<T> extractField(String fieldValue);
099
100    @Override
101    public Optional<T> extract(ColumnarIterator.Row row) {
102        if(row.getRowData().containsKey(getFieldName())) {
103            return extractField(row.getRowData().get(getFieldName()));
104        } else {
105            logger.warning("Row was missing expected field " + getFieldName());
106            return Optional.empty();
107        }
108    }
109
110    @Override
111    public String toString() {
112        return this.getClass().getSimpleName() + "(fieldName=" + fieldName + ", metadataName=" + metadataName + ")";
113    }
114}