001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.util.tokens.options; 018 019import com.oracle.labs.mlrg.olcut.config.Option; 020import org.tribuo.util.tokens.Tokenizer; 021import org.tribuo.util.tokens.impl.SplitCharactersTokenizer; 022 023/** 024 * CLI options for a {@link SplitCharactersTokenizer}. 025 */ 026public class SplitCharactersTokenizerOptions implements TokenizerOptions { 027 028 @Option(longName = "sc-tokenizer-split-characters", usage = "The characters to split on.") 029 public char[] splitChars = SplitCharactersTokenizer.DEFAULT_SPLIT_CHARACTERS; 030 ; 031 032 @Option(longName = "sc-tokenizer-split-x-digits", usage = "Characters to split on unless they appear between digits") 033 public char[] splitXDigitsChars = SplitCharactersTokenizer.DEFAULT_SPLIT_EXCEPTING_IN_DIGITS_CHARACTERS; 034 035 @Override 036 public Tokenizer getTokenizer() { 037 return new SplitCharactersTokenizer(splitChars, splitXDigitsChars); 038 } 039 040}