001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.data.columnar.extractors; 018 019import com.oracle.labs.mlrg.olcut.config.Config; 020import org.tribuo.data.columnar.ColumnarIterator; 021import org.tribuo.data.columnar.FieldExtractor; 022 023import java.util.Optional; 024import java.util.logging.Logger; 025 026/** 027 * Extracts a value from a single field to be placed in an {@link org.tribuo.Example}'s metadata field. 028 */ 029public abstract class SimpleFieldExtractor<T> implements FieldExtractor<T> { 030 031 private static final Logger logger = Logger.getLogger(SimpleFieldExtractor.class.getName()); 032 033 @Config(mandatory = true,description="The field name to read.") 034 protected String fieldName; 035 036 @Config(description="The metadata key to emit, defaults to field name if unpopulated") 037 protected String metadataName; 038 039 /** 040 * Constructs a simple field extractor which reads from the supplied field name and 041 * writes out to a metadata field with the same name. 042 * @param fieldName The field name to read from. 043 */ 044 protected SimpleFieldExtractor(String fieldName) { 045 this(fieldName, fieldName); 046 } 047 048 /** 049 * Constructs a simple field extractor with the supplied field name and metadata field name. 050 * @param fieldName The field name to read. 051 * @param metadataName The metadata field name to write to. 052 */ 053 protected SimpleFieldExtractor(String fieldName, String metadataName) { 054 this.fieldName = fieldName; 055 this.metadataName = metadataName; 056 } 057 058 /** 059 * For olcut. 060 */ 061 protected SimpleFieldExtractor() {} 062 063 /** 064 * Used by the OLCUT configuration system, and should not be called by external code. 065 */ 066 @Override 067 public void postConfig() { 068 if (metadataName == null || metadataName.isEmpty()) { 069 metadataName = fieldName; 070 } 071 } 072 073 /** 074 * Gets the field name this extractor operates on. 075 * @return The field name. 076 */ 077 public String getFieldName() { 078 return fieldName; 079 } 080 081 /** 082 * Gets the metadata key name. This is the key into which this value will be written in an {@link org.tribuo.Example} 083 * if it is given to {@link org.tribuo.data.columnar.RowProcessor}. 084 * <p> 085 * Defaults to the field name. 086 * @return The metadata key name. 087 */ 088 @Override 089 public String getMetadataName() { 090 return metadataName; 091 } 092 093 /** 094 * Extracts the field value, or returns {@link Optional#empty} if it failed to parse. 095 * @param fieldValue The field value to read. 096 * @return The extracted value. 097 */ 098 protected abstract Optional<T> extractField(String fieldValue); 099 100 @Override 101 public Optional<T> extract(ColumnarIterator.Row row) { 102 if(row.getRowData().containsKey(getFieldName())) { 103 return extractField(row.getRowData().get(getFieldName())); 104 } else { 105 logger.warning("Row was missing expected field " + getFieldName()); 106 return Optional.empty(); 107 } 108 } 109 110 @Override 111 public String toString() { 112 return this.getClass().getSimpleName() + "(fieldName=" + fieldName + ", metadataName=" + metadataName + ")"; 113 } 114}