001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.clustering.evaluation;
018
019import org.tribuo.clustering.ClusterID;
020import org.tribuo.evaluation.Evaluation;
021
022/**
023 * An {@link Evaluation} for clustering tasks.
024 */
025public interface ClusteringEvaluation extends Evaluation<ClusterID> {
026
027    /**
028     * Calculates the normalized MI between the ground truth clustering ids and the predicted ones.
029     * <p>
030     * The value is bounded between 0 and 1.
031     * <p>
032     * If this value is 1, then the predicted id values are a permutation of the supplied ids.
033     * If the value is 0 then the predicted ids are random wrt the supplied ids.
034     * @return The normalized MI.
035     */
036    double normalizedMI();
037
038    /**
039     * Measures the adjusted normalized mutual information between the predicted ids and the supplied ids.
040     * <p>
041     * The value is bounded between 0 and 1.
042     * <p>
043     * If this value is 1, then the predicted id values are a permutation of the supplied ids.
044     * If the value is 0 then the predicted ids are random wrt the supplied ids.
045     * <p>
046     * It's adjusted for chance unlike the normalized one.
047     * @return The adjusted MI.
048     */
049    double adjustedMI();
050
051}