001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.data.columnar;
018
019import org.tribuo.Example;
020import org.tribuo.Feature;
021
022/**
023 * A Feature with extra bookkeeping for use inside the columnar package.
024 * <p>
025 * {@link Example}s may destroy and recreate Feature instances so don't
026 * expect ColumnarFeatures to still be ColumnarFeatures if you probe the
027 * Example after construction.
028 */
029public class ColumnarFeature extends Feature {
030    private static final long serialVersionUID = 1L;
031
032    public static final String CONJUNCTION = "CONJ";
033
034    public static final String JOINER = "@";
035
036    private final String fieldName;
037
038    private final String firstFieldName;
039
040    private final String secondFieldName;
041
042    private final String columnEntry;
043
044    /**
045     * Constructs a {@code ColumnarFeature} from the field name, column entry and value.
046     * @param fieldName The field name.
047     * @param columnEntry The name of the extracted value from the field.
048     * @param value The feature value.
049     */
050    public ColumnarFeature(String fieldName, String columnEntry, double value) {
051        super(generateFeatureName(fieldName,columnEntry), value);
052        this.fieldName = fieldName;
053        this.columnEntry = columnEntry;
054        this.firstFieldName = "";
055        this.secondFieldName = "";
056    }
057
058    /**
059     * Constructs a {@code ColumnarFeature} which is the conjunction of features from two fields.
060     * @param firstFieldName The first field name.
061     * @param secondFieldName The second field name.
062     * @param columnEntry The name of the extracted value from the field.
063     * @param value The feature value.
064     */
065    public ColumnarFeature(String firstFieldName, String secondFieldName, String columnEntry, double value) {
066        super(generateFeatureName(firstFieldName,secondFieldName,columnEntry),value);
067        this.fieldName = CONJUNCTION;
068        this.columnEntry = columnEntry;
069        this.firstFieldName = firstFieldName;
070        this.secondFieldName = secondFieldName;
071    }
072
073    /**
074     * Generates a feature name based on the field name.
075     * <p>
076     * Uses {@link ColumnarFeature#JOINER} to join the strings.
077     * @param fieldName The field name.
078     * @param name The name of the extracted feature.
079     * @return The new feature name.
080     */
081    public static String generateFeatureName(String fieldName, String name) {
082        return fieldName + JOINER + name;
083    }
084
085    /**
086     * Generates a feature name used for conjunction features.
087     * <p>
088     * Uses {@link ColumnarFeature#JOINER} to join the strings and {@link ColumnarFeature#CONJUNCTION} to prepend the name.
089     * @param firstFieldName The name of the first field.
090     * @param secondFieldName The name of the second field.
091     * @param name The name of the extracted feature.
092     * @return The new feature name.
093     */
094    public static String generateFeatureName(String firstFieldName, String secondFieldName, String name) {
095        return CONJUNCTION + "[" + firstFieldName + "," + secondFieldName + "]" + JOINER + name;
096    }
097
098    /**
099     * Gets the field name. Returns {@link ColumnarFeature#CONJUNCTION} if it's a conjunction.
100     * @return The field name.
101     */
102    public String getFieldName() {
103        return fieldName;
104    }
105
106    /**
107     * If it's a conjunction feature, return the first field name.
108     * Otherwise return an empty String.
109     * @return The first field name, or an empty string.
110     */
111    public String getFirstFieldName() {
112        return firstFieldName;
113    }
114
115    /**
116     * If it's a conjunction feature, return the second field name.
117     * Otherwise return an empty String.
118     * @return The second field name, or an empty string.
119     */
120    public String getSecondFieldName() {
121        return secondFieldName;
122    }
123
124    /**
125     * Gets the columnEntry (i.e., the feature name produced by the {@link FieldExtractor}
126     * without the fieldName).
127     * @return The feature's column entry.
128     */
129    public String getColumnEntry() {
130        return columnEntry;
131    }
132}