001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.json;
018
019import com.fasterxml.jackson.core.JsonFactory;
020import com.fasterxml.jackson.core.JsonParser;
021import com.fasterxml.jackson.databind.JsonNode;
022import com.fasterxml.jackson.databind.ObjectMapper;
023import com.fasterxml.jackson.databind.node.ArrayNode;
024import com.fasterxml.jackson.databind.node.ObjectNode;
025import org.tribuo.data.columnar.ColumnarIterator;
026
027import java.io.IOException;
028import java.io.Reader;
029import java.net.URI;
030import java.nio.file.Files;
031import java.nio.file.Paths;
032import java.util.ArrayList;
033import java.util.Collections;
034import java.util.Iterator;
035import java.util.List;
036import java.util.Map;
037import java.util.Optional;
038import java.util.logging.Level;
039import java.util.logging.Logger;
040
041/**
042 * An iterator for JSON format files converting them into a format suitable for
043 * {@link org.tribuo.data.columnar.RowProcessor}.
044 */
045public class JsonFileIterator extends ColumnarIterator implements AutoCloseable {
046    private static final Logger logger = Logger.getLogger(JsonFileIterator.class.getName());
047
048    private final JsonParser parser;
049    private final Iterator<JsonNode> nodeIterator;
050    private int rowNum = 0;
051
052    /**
053     * Builds a JsonFileIterator for the supplied Reader.
054     * @param reader The source to read.
055     */
056    public JsonFileIterator(Reader reader) {
057        JsonFactory jsonFactory = new JsonFactory();
058        //noinspection OverlyBroadCatchBlock
059        try {
060            parser = jsonFactory.createParser(reader);
061            parser.setCodec(new ObjectMapper());
062            JsonNode jsonNode = parser.readValueAsTree();
063            if (jsonNode.isArray()) {
064                ArrayNode node = (ArrayNode) jsonNode;
065                nodeIterator = node.elements();
066                if (nodeIterator.hasNext()) {
067                    JsonNode curNode = nodeIterator.next();
068                    if (curNode instanceof ObjectNode) {
069                        Map<String, String> curEntry = JsonUtil.convertToMap((ObjectNode)curNode);
070                        List<String> headerList = new ArrayList<>(curEntry.keySet());
071                        Collections.sort(headerList);
072                        fields = headerList;
073                        currentRow = Optional.of(new Row(rowNum, fields, curEntry));
074                        rowNum++;
075                    } else {
076                        throw new IllegalStateException("Expected an array of JSON objects but found '" + curNode.asText() + "'");
077                    }
078                } else {
079                    throw new IllegalStateException("No elements found in JSON array");
080                }
081            } else {
082                throw new IllegalStateException("JSON array not found when reading file");
083            }
084        } catch (IOException e) {
085            throw new IllegalStateException("Error reading json file caused by: " + e.getMessage());
086        }
087    }
088
089    /**
090     * Builds a CSVIterator for the supplied URI.
091     * @param dataFile The source to read.
092     * @throws IOException thrown if the file is not readable in some way.
093     */
094    public JsonFileIterator(URI dataFile) throws IOException {
095        this(Files.newBufferedReader(Paths.get(dataFile)));
096    }
097
098    @Override
099    protected Optional<Row> getRow() {
100        // row is initially populated in the constructor
101        if (nodeIterator.hasNext()) {
102            JsonNode next = nodeIterator.next();
103            if (next instanceof ObjectNode) {
104                Row row = new Row(rowNum, fields, JsonUtil.convertToMap((ObjectNode)next));
105                rowNum++;
106                return Optional.of(row);
107            } else {
108                logger.warning("Unexpected node found, expected ObjectNode, found '" + next.asText() + '"');
109                return Optional.empty();
110            }
111        } else {
112            try {
113                parser.close();
114            } catch (IOException e) {
115                logger.log(Level.WARNING, "Error closing reader at end of file", e);
116            }
117            return Optional.empty();
118        }
119    }
120
121    @Override
122    public void close() throws IOException {
123        parser.close();
124    }
125}