001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.json; 018 019import com.fasterxml.jackson.core.JsonFactory; 020import com.fasterxml.jackson.core.JsonParser; 021import com.fasterxml.jackson.databind.JsonNode; 022import com.fasterxml.jackson.databind.ObjectMapper; 023import com.fasterxml.jackson.databind.node.ArrayNode; 024import com.fasterxml.jackson.databind.node.ObjectNode; 025import org.tribuo.data.columnar.ColumnarIterator; 026 027import java.io.IOException; 028import java.io.Reader; 029import java.net.URI; 030import java.nio.file.Files; 031import java.nio.file.Paths; 032import java.util.ArrayList; 033import java.util.Collections; 034import java.util.Iterator; 035import java.util.List; 036import java.util.Map; 037import java.util.Optional; 038import java.util.logging.Level; 039import java.util.logging.Logger; 040 041/** 042 * An iterator for JSON format files converting them into a format suitable for 043 * {@link org.tribuo.data.columnar.RowProcessor}. 044 */ 045public class JsonFileIterator extends ColumnarIterator implements AutoCloseable { 046 private static final Logger logger = Logger.getLogger(JsonFileIterator.class.getName()); 047 048 private final JsonParser parser; 049 private final Iterator<JsonNode> nodeIterator; 050 private int rowNum = 0; 051 052 /** 053 * Builds a JsonFileIterator for the supplied Reader. 054 * @param reader The source to read. 055 */ 056 public JsonFileIterator(Reader reader) { 057 JsonFactory jsonFactory = new JsonFactory(); 058 //noinspection OverlyBroadCatchBlock 059 try { 060 parser = jsonFactory.createParser(reader); 061 parser.setCodec(new ObjectMapper()); 062 JsonNode jsonNode = parser.readValueAsTree(); 063 if (jsonNode.isArray()) { 064 ArrayNode node = (ArrayNode) jsonNode; 065 nodeIterator = node.elements(); 066 if (nodeIterator.hasNext()) { 067 JsonNode curNode = nodeIterator.next(); 068 if (curNode instanceof ObjectNode) { 069 Map<String, String> curEntry = JsonUtil.convertToMap((ObjectNode)curNode); 070 List<String> headerList = new ArrayList<>(curEntry.keySet()); 071 Collections.sort(headerList); 072 fields = headerList; 073 currentRow = Optional.of(new Row(rowNum, fields, curEntry)); 074 rowNum++; 075 } else { 076 throw new IllegalStateException("Expected an array of JSON objects but found '" + curNode.asText() + "'"); 077 } 078 } else { 079 throw new IllegalStateException("No elements found in JSON array"); 080 } 081 } else { 082 throw new IllegalStateException("JSON array not found when reading file"); 083 } 084 } catch (IOException e) { 085 throw new IllegalStateException("Error reading json file caused by: " + e.getMessage()); 086 } 087 } 088 089 /** 090 * Builds a CSVIterator for the supplied URI. 091 * @param dataFile The source to read. 092 * @throws IOException thrown if the file is not readable in some way. 093 */ 094 public JsonFileIterator(URI dataFile) throws IOException { 095 this(Files.newBufferedReader(Paths.get(dataFile))); 096 } 097 098 @Override 099 protected Optional<Row> getRow() { 100 // row is initially populated in the constructor 101 if (nodeIterator.hasNext()) { 102 JsonNode next = nodeIterator.next(); 103 if (next instanceof ObjectNode) { 104 Row row = new Row(rowNum, fields, JsonUtil.convertToMap((ObjectNode)next)); 105 rowNum++; 106 return Optional.of(row); 107 } else { 108 logger.warning("Unexpected node found, expected ObjectNode, found '" + next.asText() + '"'); 109 return Optional.empty(); 110 } 111 } else { 112 try { 113 parser.close(); 114 } catch (IOException e) { 115 logger.log(Level.WARNING, "Error closing reader at end of file", e); 116 } 117 return Optional.empty(); 118 } 119 } 120 121 @Override 122 public void close() throws IOException { 123 parser.close(); 124 } 125}