001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.data.columnar; 018 019import org.tribuo.Example; 020import org.tribuo.Feature; 021 022/** 023 * A Feature with extra bookkeeping for use inside the columnar package. 024 * <p> 025 * {@link Example}s may destroy and recreate Feature instances so don't 026 * expect ColumnarFeatures to still be ColumnarFeatures if you probe the 027 * Example after construction. 028 */ 029public class ColumnarFeature extends Feature { 030 private static final long serialVersionUID = 1L; 031 032 public static final String CONJUNCTION = "CONJ"; 033 034 public static final String JOINER = "@"; 035 036 private final String fieldName; 037 038 private final String firstFieldName; 039 040 private final String secondFieldName; 041 042 private final String columnEntry; 043 044 /** 045 * Constructs a {@code ColumnarFeature} from the field name, column entry and value. 046 * @param fieldName The field name. 047 * @param columnEntry The name of the extracted value from the field. 048 * @param value The feature value. 049 */ 050 public ColumnarFeature(String fieldName, String columnEntry, double value) { 051 super(generateFeatureName(fieldName,columnEntry), value); 052 this.fieldName = fieldName; 053 this.columnEntry = columnEntry; 054 this.firstFieldName = ""; 055 this.secondFieldName = ""; 056 } 057 058 /** 059 * Constructs a {@code ColumnarFeature} which is the conjunction of features from two fields. 060 * @param firstFieldName The first field name. 061 * @param secondFieldName The second field name. 062 * @param columnEntry The name of the extracted value from the field. 063 * @param value The feature value. 064 */ 065 public ColumnarFeature(String firstFieldName, String secondFieldName, String columnEntry, double value) { 066 super(generateFeatureName(firstFieldName,secondFieldName,columnEntry),value); 067 this.fieldName = CONJUNCTION; 068 this.columnEntry = columnEntry; 069 this.firstFieldName = firstFieldName; 070 this.secondFieldName = secondFieldName; 071 } 072 073 /** 074 * Generates a feature name based on the field name. 075 * <p> 076 * Uses {@link ColumnarFeature#JOINER} to join the strings. 077 * @param fieldName The field name. 078 * @param name The name of the extracted feature. 079 * @return The new feature name. 080 */ 081 public static String generateFeatureName(String fieldName, String name) { 082 return fieldName + JOINER + name; 083 } 084 085 /** 086 * Generates a feature name used for conjunction features. 087 * <p> 088 * Uses {@link ColumnarFeature#JOINER} to join the strings and {@link ColumnarFeature#CONJUNCTION} to prepend the name. 089 * @param firstFieldName The name of the first field. 090 * @param secondFieldName The name of the second field. 091 * @param name The name of the extracted feature. 092 * @return The new feature name. 093 */ 094 public static String generateFeatureName(String firstFieldName, String secondFieldName, String name) { 095 return CONJUNCTION + "[" + firstFieldName + "," + secondFieldName + "]" + JOINER + name; 096 } 097 098 /** 099 * Gets the field name. Returns {@link ColumnarFeature#CONJUNCTION} if it's a conjunction. 100 * @return The field name. 101 */ 102 public String getFieldName() { 103 return fieldName; 104 } 105 106 /** 107 * If it's a conjunction feature, return the first field name. 108 * Otherwise return an empty String. 109 * @return The first field name, or an empty string. 110 */ 111 public String getFirstFieldName() { 112 return firstFieldName; 113 } 114 115 /** 116 * If it's a conjunction feature, return the second field name. 117 * Otherwise return an empty String. 118 * @return The second field name, or an empty string. 119 */ 120 public String getSecondFieldName() { 121 return secondFieldName; 122 } 123 124 /** 125 * Gets the columnEntry (i.e., the feature name produced by the {@link FieldExtractor} 126 * without the fieldName). 127 * @return The feature's column entry. 128 */ 129 public String getColumnEntry() { 130 return columnEntry; 131 } 132}