001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.util.tokens.universal;
018
019import org.tribuo.util.tokens.Token;
020
021/**
022 * A range currently being segmented.
023 */
024public final class Range implements CharSequence {
025    public char[] buff = new char[16];
026    public int len;
027    public int start;
028    public int end;
029    public int incr;
030    public Token.TokenType type;
031
032    Range() {}
033
034    public void set(char c1, char c2, int start) {
035        buff[0] = c1;
036        buff[1] = c2;
037        this.start = start;
038        this.end = start + 2;
039        this.len = 2;
040        this.incr = 0;
041        this.type = Token.TokenType.NGRAM;
042    }
043
044    public void set(char c, int start) {
045        buff[0] = c;
046        this.start = start;
047        this.end = start + 1;
048        this.len = 1;
049        this.incr = 1;
050        this.type = Token.TokenType.WORD;
051    }
052
053    public void set(char[] buff, int len, int start) {
054        if (this.buff.length < buff.length) {
055            this.buff = new char[buff.length + 1];
056        }
057        System.arraycopy(buff, 0, this.buff, 0, len);
058        this.len = len;
059        this.start = start;
060        this.end = start + len;
061        this.incr = 1;
062        this.type = Token.TokenType.WORD;
063    }
064
065    public void punct(char p, int start) {
066        buff[0] = p;
067        this.len = 1;
068        this.start = Math.max(start, 0);
069        this.end = this.start + 1;
070        this.incr = 0;
071        this.type = Token.TokenType.PUNCTUATION;
072    }
073
074    public void setType(Token.TokenType type) {
075        this.type = type;
076    }
077
078    @Override
079    public int length() {
080        return len;
081    }
082
083    @Override
084    public char charAt(int index) {
085        if (index < len) {
086            return buff[index];
087        }
088        throw new IndexOutOfBoundsException(String.format("index %d exceeds length %d", index, len));
089    }
090
091    @Override
092    public CharSequence subSequence(int start, int end) {
093        Range r = new Range();
094        System.arraycopy(buff, start, r.buff, 0, end - start);
095        r.start = 0;
096        r.len = end - start;
097        r.end = r.len;
098        return r;
099    }
100
101    @Override
102    public String toString() {
103        return new String(buff, 0, len) + " " + type + " " + start + " " + end;
104    }
105}