001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.util.tokens.options;
018
019import com.oracle.labs.mlrg.olcut.config.Option;
020import org.tribuo.util.tokens.Tokenizer;
021import org.tribuo.util.tokens.impl.BreakIteratorTokenizer;
022
023import java.util.Locale;
024
025/**
026 * CLI options for a {@link BreakIteratorTokenizer}.
027 */
028public class BreakIteratorTokenizerOptions implements TokenizerOptions {
029
030    @Option(longName = "bi-tokenizer-language-tag", usage = "BreakIteratorTokenizer - The language tag of the locale to be used.")
031    public String languageTag;
032
033    @Override
034    public Tokenizer getTokenizer() {
035        return new BreakIteratorTokenizer(Locale.forLanguageTag(languageTag));
036    }
037}