001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.math.la; 018 019import org.tribuo.math.util.VectorNormalizer; 020 021/** 022 * Interface for 1 dimensional {@link Tensor}s. 023 * <p> 024 * Vectors have immutable sizes and immutable indices (so {@link SparseVector} can't grow). 025 */ 026public interface SGDVector extends Tensor, Iterable<VectorTuple> { 027 028 /** 029 * Returns a deep copy of this vector. 030 * @return A copy of this vector. 031 */ 032 public SGDVector copy(); 033 034 /** 035 * Returns the dimensionality of this vector. 036 * @return The dimensionality of the vector. 037 */ 038 public int size(); 039 040 /** 041 * Returns the number of non-zero elements (on construction, an element 042 * could be set to zero and it would still remain active). 043 * @return The number of non-zero elements. 044 */ 045 public int numActiveElements(); 046 047 /** 048 * Generates a new vector with each element scaled by {@code coefficient}. 049 * @param coefficient The coefficient to scale the elements by. 050 * @return A new {@link SGDVector}. 051 */ 052 public SGDVector scale(double coefficient); 053 054 /** 055 * Adds {@code value} to the element at {@code index}. 056 * @param index The index to update. 057 * @param value The value to add. 058 */ 059 public void add(int index, double value); 060 061 /** 062 * Adds {@code other} to this vector, producing a new {@link SGDVector}. 063 * Adding Dense to Dense/Sparse produces a {@link DenseVector}, adding Sparse to 064 * Sparse produces a {@link SparseVector}. 065 * @param other The vector to add. 066 * @return A new {@link SGDVector} where each element value = this.get(i) + other.get(i). 067 */ 068 public SGDVector add(SGDVector other); 069 070 /** 071 * Subtracts {@code other} from this vector, producing a new {@link SGDVector}. 072 * Subtracting Dense from Dense/Sparse produces a {@link DenseVector}, subtracting Sparse from 073 * Sparse produces a {@link SparseVector}. 074 * @param other The vector to subtract. 075 * @return A new {@link SGDVector} where each element value = this.get(i) - other.get(i). 076 */ 077 public SGDVector subtract(SGDVector other); 078 079 /** 080 * Calculates the dot product between this vector and {@code other}. 081 * @param other The other vector. 082 * @return The dot product. 083 */ 084 public double dot(SGDVector other); 085 086 /** 087 * Generates the matrix representing the outer product between the two vectors. 088 * @param other Another {@link SGDVector} 089 * @return The outer product {@link Matrix}. 090 */ 091 public Matrix outer(SGDVector other); 092 093 /** 094 * Calculates the sum of this vector. 095 * @return The sum. 096 */ 097 public double sum(); 098 099 /** 100 * Calculates the euclidean norm for this vector. 101 * @return The euclidean norm. 102 */ 103 @Override 104 public double twoNorm(); 105 106 /** 107 * Calculates the Manhattan norm for this vector. 108 * @return The Manhattan norm. 109 */ 110 public double oneNorm(); 111 112 /** 113 * Gets an element from this vector. 114 * @param index The index of the element. 115 * @return The value at that index. 116 */ 117 public double get(int index); 118 119 /** 120 * Sets the {@code index} to the {@code value}. 121 * @param index The index to set. 122 * @param value The value to set it to. 123 */ 124 public void set(int index, double value); 125 126 /** 127 * Returns the index of the maximum value. Requires probing the array. 128 * @return The index of the maximum value. 129 */ 130 public int indexOfMax(); 131 132 /** 133 * Returns the maximum value. Requires probing the array. 134 * @return The maximum value. 135 */ 136 public double maxValue(); 137 138 /** 139 * Returns the minimum value. Requires probing the array. 140 * @return The minimum value. 141 */ 142 public double minValue(); 143 144 /** 145 * Normalizes the vector using the supplied vector normalizer. 146 * @param normalizer The kind of normalization to apply. 147 */ 148 public void normalize(VectorNormalizer normalizer); 149 150 /** 151 * Synonym for euclideanDistance. 152 * @param other The other vector. 153 * @return The l2 norm of the difference between the two vectors. 154 */ 155 default public double l2Distance(SGDVector other) { 156 return euclideanDistance(other); 157 } 158 159 /** 160 * The l2 or euclidean distance between this vector and the other vector. 161 * @param other The other vector. 162 * @return The euclidean distance between them. 163 */ 164 public double euclideanDistance(SGDVector other); 165 166 /** 167 * The l1 or Manhattan distance between this vector and the other vector. 168 * @param other The other vector. 169 * @return The l1 distance. 170 */ 171 public double l1Distance(SGDVector other); 172 173 /** 174 * Calculates the cosine distance of two vectors. 175 * 1 - cos(x,y) 176 * @param other The other vector. 177 * @return 1 - cosine similarity (this,other) 178 */ 179 default public double cosineDistance(SGDVector other) { 180 return 1 - cosineSimilarity(other); 181 } 182 183 /** 184 * Calculates the cosine similarity of two vectors. 185 * cos(x,y) = dot(x,y) / (norm(x) * norm(y)) 186 * @param other The other vector. 187 * @return cosine similarity (this,other) 188 */ 189 default public double cosineSimilarity(SGDVector other) { 190 double numerator = dot(other); 191 double output = 0.0; 192 if (numerator != 0.0) { 193 output = numerator / (twoNorm() * other.twoNorm()); 194 } 195 return output; 196 } 197 198 /** 199 * Calculates the variance of this vector. 200 * @return The variance of the vector. 201 */ 202 default public double variance() { 203 double mean = sum() / size(); 204 return variance(mean); 205 } 206 207 /** 208 * Calculates the variance of this vector based on the supplied mean. 209 * @param mean The mean of the vector. 210 * @return The variance of the vector. 211 */ 212 public double variance(double mean); 213}