001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.util.tokens.impl; 018 019import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance; 020import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl; 021import org.tribuo.util.tokens.Token; 022import org.tribuo.util.tokens.Tokenizer; 023 024/** 025 * A convenience class for when you are required to provide a tokenizer but you 026 * don't actually want to split up the text into tokens. This tokenizer will 027 * serve up a single "token" corresponding to the input text. 028 */ 029public class NonTokenizer implements Tokenizer { 030 031 private CharSequence cs; 032 033 private boolean done = false; 034 035 public NonTokenizer() { } 036 037 @Override 038 public ConfiguredObjectProvenance getProvenance() { 039 return new ConfiguredObjectProvenanceImpl(this, "Tokenizer"); 040 } 041 042 @Override 043 public void reset(CharSequence cs) { 044 this.cs = cs; 045 this.done = false; 046 } 047 048 @Override 049 public boolean advance() { 050 if (cs == null) { 051 throw new IllegalStateException("NonTokenizer has not been reset."); 052 } 053 if (!done) { 054 done = true; 055 return true; 056 } 057 return false; 058 } 059 060 @Override 061 public String getText() { 062 if (done) { 063 return cs.toString(); 064 } else { 065 throw new IllegalStateException("NonTokenizer isn't ready."); 066 } 067 } 068 069 @Override 070 public int getStart() { 071 if (done) { 072 return 0; 073 } else { 074 throw new IllegalStateException("NonTokenizer isn't ready."); 075 } 076 } 077 078 @Override 079 public int getEnd() { 080 if (done) { 081 return cs.length(); 082 } else { 083 throw new IllegalStateException("NonTokenizer isn't ready."); 084 } 085 } 086 087 @Override 088 public Token.TokenType getType() { 089 if (done) { 090 return Token.TokenType.WORD; 091 } else { 092 throw new IllegalStateException("NonTokenizer isn't ready."); 093 } 094 } 095 096 @Override 097 public NonTokenizer clone() { 098 try { 099 NonTokenizer copy = (NonTokenizer) super.clone(); 100 copy.done = false; 101 copy.cs = null; 102 return copy; 103 } catch (CloneNotSupportedException e) { 104 throw new Error("Assertion error, NonTokenizer is Cloneable."); 105 } 106 } 107 108}