001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.math.la;
018
019import org.tribuo.math.util.VectorNormalizer;
020
021/**
022 * Interface for 1 dimensional {@link Tensor}s.
023 * <p>
024 * Vectors have immutable sizes and immutable indices (so {@link SparseVector} can't grow).
025 */
026public interface SGDVector extends Tensor, Iterable<VectorTuple> {
027
028    /**
029     * Returns a deep copy of this vector.
030     * @return A copy of this vector.
031     */
032    public SGDVector copy();
033
034    /**
035     * Returns the dimensionality of this vector.
036     * @return The dimensionality of the vector.
037     */
038    public int size();
039
040    /**
041     * Returns the number of non-zero elements (on construction, an element
042     * could be set to zero and it would still remain active).
043     * @return The number of non-zero elements.
044     */
045    public int numActiveElements();
046
047    /**
048     * Generates a new vector with each element scaled by {@code coefficient}.
049     * @param coefficient The coefficient to scale the elements by.
050     * @return A new {@link SGDVector}.
051     */
052    public SGDVector scale(double coefficient);
053
054    /**
055     * Adds {@code value} to the element at {@code index}.
056     * @param index The index to update.
057     * @param value The value to add.
058     */
059    public void add(int index, double value);
060
061    /**
062     * Adds {@code other} to this vector, producing a new {@link SGDVector}.
063     * Adding Dense to Dense/Sparse produces a {@link DenseVector}, adding Sparse to
064     * Sparse produces a {@link SparseVector}.
065     * @param other The vector to add.
066     * @return A new {@link SGDVector} where each element value = this.get(i) + other.get(i).
067     */
068    public SGDVector add(SGDVector other);
069
070    /**
071     * Subtracts {@code other} from this vector, producing a new {@link SGDVector}.
072     * Subtracting Dense from Dense/Sparse produces a {@link DenseVector}, subtracting Sparse from
073     * Sparse produces a {@link SparseVector}.
074     * @param other The vector to subtract.
075     * @return A new {@link SGDVector} where each element value = this.get(i) - other.get(i).
076     */
077    public SGDVector subtract(SGDVector other);
078
079    /**
080     * Calculates the dot product between this vector and {@code other}.
081     * @param other The other vector.
082     * @return The dot product.
083     */
084    public double dot(SGDVector other);
085
086    /**
087     * Generates the matrix representing the outer product between the two vectors.
088     * @param other Another {@link SGDVector}
089     * @return The outer product {@link Matrix}.
090     */
091    public Matrix outer(SGDVector other);
092
093    /**
094     * Calculates the sum of this vector.
095     * @return The sum.
096     */
097    public double sum();
098
099    /**
100     * Calculates the euclidean norm for this vector.
101     * @return The euclidean norm.
102     */
103    @Override
104    public double twoNorm();
105
106    /**
107     * Calculates the Manhattan norm for this vector.
108     * @return The Manhattan norm.
109     */
110    public double oneNorm();
111
112    /**
113     * Gets an element from this vector.
114     * @param index The index of the element.
115     * @return The value at that index.
116     */
117    public double get(int index);
118
119    /**
120     * Sets the {@code index} to the {@code value}.
121     * @param index The index to set.
122     * @param value The value to set it to.
123     */
124    public void set(int index, double value);
125
126    /**
127     * Returns the index of the maximum value. Requires probing the array.
128     * @return The index of the maximum value.
129     */
130    public int indexOfMax();
131
132    /**
133     * Returns the maximum value. Requires probing the array.
134     * @return The maximum value.
135     */
136    public double maxValue();
137
138    /**
139     * Returns the minimum value. Requires probing the array.
140     * @return The minimum value.
141     */
142    public double minValue();
143
144    /**
145     * Normalizes the vector using the supplied vector normalizer.
146     * @param normalizer The kind of normalization to apply.
147     */
148    public void normalize(VectorNormalizer normalizer);
149
150    /**
151     * Synonym for euclideanDistance.
152     * @param other The other vector.
153     * @return The l2 norm of the difference between the two vectors.
154     */
155    default public double l2Distance(SGDVector other) {
156        return euclideanDistance(other);
157    }
158
159    /**
160     * The l2 or euclidean distance between this vector and the other vector.
161     * @param other The other vector.
162     * @return The euclidean distance between them.
163     */
164    public double euclideanDistance(SGDVector other);
165
166    /**
167     * The l1 or Manhattan distance between this vector and the other vector.
168     * @param other The other vector.
169     * @return The l1 distance.
170     */
171    public double l1Distance(SGDVector other);
172
173    /**
174     * Calculates the cosine distance of two vectors.
175     * 1 - cos(x,y)
176     * @param other The other vector.
177     * @return 1 - cosine similarity (this,other)
178     */
179    default public double cosineDistance(SGDVector other) {
180        return 1 - cosineSimilarity(other);
181    }
182
183    /**
184     * Calculates the cosine similarity of two vectors.
185     * cos(x,y) = dot(x,y) / (norm(x) * norm(y))
186     * @param other The other vector.
187     * @return cosine similarity (this,other)
188     */
189    default public double cosineSimilarity(SGDVector other) {
190        double numerator = dot(other);
191        double output = 0.0;
192        if (numerator != 0.0) {
193            output = numerator / (twoNorm() * other.twoNorm());
194        }
195        return output;
196    }
197
198    /**
199     * Calculates the variance of this vector.
200     * @return The variance of the vector.
201     */
202    default public double variance() {
203        double mean = sum() / size();
204        return variance(mean);
205    }
206
207    /**
208     * Calculates the variance of this vector based on the supplied mean.
209     * @param mean The mean of the vector.
210     * @return The variance of the vector.
211     */
212    public double variance(double mean);
213}