001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.util.tokens.impl;
018
019import com.oracle.labs.mlrg.olcut.provenance.ConfiguredObjectProvenance;
020import com.oracle.labs.mlrg.olcut.provenance.impl.ConfiguredObjectProvenanceImpl;
021import org.tribuo.util.tokens.Token;
022import org.tribuo.util.tokens.Tokenizer;
023
024/**
025 * A convenience class for when you are required to provide a tokenizer but you
026 * don't actually want to split up the text into tokens.  This tokenizer will
027 * serve up a single "token" corresponding to the input text.
028 */
029public class NonTokenizer implements Tokenizer {
030
031    private CharSequence cs;
032
033    private boolean done = false;
034
035    public NonTokenizer() { }
036
037    @Override
038    public ConfiguredObjectProvenance getProvenance() {
039        return new ConfiguredObjectProvenanceImpl(this, "Tokenizer");
040    }
041
042    @Override
043    public void reset(CharSequence cs) {
044        this.cs = cs;
045        this.done = false;
046    }
047
048    @Override
049    public boolean advance() {
050        if (cs == null) {
051            throw new IllegalStateException("NonTokenizer has not been reset.");
052        }
053        if (!done) {
054            done = true;
055            return true;
056        }
057        return false;
058    }
059
060    @Override
061    public String getText() {
062        if (done) {
063            return cs.toString();
064        } else {
065            throw new IllegalStateException("NonTokenizer isn't ready.");
066        }
067    }
068
069    @Override
070    public int getStart() {
071        if (done) {
072            return 0;
073        } else {
074            throw new IllegalStateException("NonTokenizer isn't ready.");
075        }
076    }
077
078    @Override
079    public int getEnd() {
080        if (done) {
081            return cs.length();
082        } else {
083            throw new IllegalStateException("NonTokenizer isn't ready.");
084        }
085    }
086
087    @Override
088    public Token.TokenType getType() {
089        if (done) {
090            return Token.TokenType.WORD;
091        } else {
092            throw new IllegalStateException("NonTokenizer isn't ready.");
093        }
094    }
095
096    @Override
097    public NonTokenizer clone() {
098        try {
099            NonTokenizer copy = (NonTokenizer) super.clone();
100            copy.done = false;
101            copy.cs = null;
102            return copy;
103        } catch (CloneNotSupportedException e) {
104            throw new Error("Assertion error, NonTokenizer is Cloneable.");
105        }
106    }
107
108}