001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.util.tokens.universal; 018 019import org.tribuo.util.tokens.Token; 020 021/** 022 * A range currently being segmented. 023 */ 024public final class Range implements CharSequence { 025 public char[] buff = new char[16]; 026 public int len; 027 public int start; 028 public int end; 029 public int incr; 030 public Token.TokenType type; 031 032 Range() {} 033 034 public void set(char c1, char c2, int start) { 035 buff[0] = c1; 036 buff[1] = c2; 037 this.start = start; 038 this.end = start + 2; 039 this.len = 2; 040 this.incr = 0; 041 this.type = Token.TokenType.NGRAM; 042 } 043 044 public void set(char c, int start) { 045 buff[0] = c; 046 this.start = start; 047 this.end = start + 1; 048 this.len = 1; 049 this.incr = 1; 050 this.type = Token.TokenType.WORD; 051 } 052 053 public void set(char[] buff, int len, int start) { 054 if (this.buff.length < buff.length) { 055 this.buff = new char[buff.length + 1]; 056 } 057 System.arraycopy(buff, 0, this.buff, 0, len); 058 this.len = len; 059 this.start = start; 060 this.end = start + len; 061 this.incr = 1; 062 this.type = Token.TokenType.WORD; 063 } 064 065 public void punct(char p, int start) { 066 buff[0] = p; 067 this.len = 1; 068 this.start = Math.max(start, 0); 069 this.end = this.start + 1; 070 this.incr = 0; 071 this.type = Token.TokenType.PUNCTUATION; 072 } 073 074 public void setType(Token.TokenType type) { 075 this.type = type; 076 } 077 078 @Override 079 public int length() { 080 return len; 081 } 082 083 @Override 084 public char charAt(int index) { 085 if (index < len) { 086 return buff[index]; 087 } 088 throw new IndexOutOfBoundsException(String.format("index %d exceeds length %d", index, len)); 089 } 090 091 @Override 092 public CharSequence subSequence(int start, int end) { 093 Range r = new Range(); 094 System.arraycopy(buff, start, r.buff, 0, end - start); 095 r.start = 0; 096 r.len = end - start; 097 r.end = r.len; 098 return r; 099 } 100 101 @Override 102 public String toString() { 103 return new String(buff, 0, len) + " " + type + " " + start + " " + end; 104 } 105}