View Javadoc
1   /*
2    * (c) Copyright 2006-2020 by rapiddweller GmbH & Volker Bergmann. All rights reserved.
3    *
4    * Redistribution and use in source and binary forms, with or without
5    * modification, is permitted under the terms of the
6    * GNU General Public License.
7    *
8    * For redistributing this software or a derivative work under a license other
9    * than the GPL-compatible Free Software License as defined by the Free
10   * Software Foundation or approved by OSI, you must first obtain a commercial
11   * license to this software product from rapiddweller GmbH & Volker Bergmann.
12   *
13   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14   * WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
15   * REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
17   * HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24   * POSSIBILITY OF SUCH DAMAGE.
25   */
26  
27  package com.rapiddweller.platform.csv;
28  
29  import com.rapiddweller.common.ArrayUtil;
30  import com.rapiddweller.common.Converter;
31  import com.rapiddweller.common.IOUtil;
32  import com.rapiddweller.common.Patterns;
33  import com.rapiddweller.common.StringUtil;
34  import com.rapiddweller.common.Tabular;
35  import com.rapiddweller.common.converter.ArrayConverter;
36  import com.rapiddweller.common.converter.ConverterChain;
37  import com.rapiddweller.format.DataContainer;
38  import com.rapiddweller.format.DataIterator;
39  import com.rapiddweller.format.csv.CSVLineIterator;
40  import com.rapiddweller.format.util.ConvertingDataIterator;
41  import com.rapiddweller.format.util.OrthogonalArrayIterator;
42  import com.rapiddweller.model.data.ComplexTypeDescriptor;
43  import com.rapiddweller.model.data.Entity;
44  import com.rapiddweller.platform.array.Array2EntityConverter;
45  
46  import java.io.FileNotFoundException;
47  import java.io.IOException;
48  import java.util.ArrayList;
49  import java.util.List;
50  
51  /**
52   * Iterates Entities in a CSV file.
53   * When the property 'columns' is set, the CSV file is assumed to have no header row.<br/>
54   * <br/>
55   * Created: 07.04.2008 09:49:08
56   *
57   * @author Volker Bergmann
58   * @since 0.5.1
59   */
60  public class CSVEntityIterator implements DataIterator<Entity>, Tabular {
61  
62    private final String uri;
63    private final char separator;
64    private final String encoding;
65    private String[] columns;
66    private final Converter<String, ?> preprocessor;
67    private boolean expectingHeader;
68    private boolean rowBased;
69  
70    private DataIterator<Entity> source;
71  
72    private boolean initialized;
73    private final ComplexTypeDescriptor entityDescriptor;
74  
75    // constructors ----------------------------------------------------------------------------------------------------
76  
77    /**
78     * Instantiates a new Csv entity iterator.
79     *
80     * @param uri          the uri
81     * @param descriptor   the descriptor
82     * @param preprocessor the preprocessor
83     * @param separator    the separator
84     * @param encoding     the encoding
85     * @throws FileNotFoundException the file not found exception
86     */
87    public CSVEntityIterator(String uri, ComplexTypeDescriptor descriptor,
88                             Converter<String, ?> preprocessor, char separator,
89                             String encoding) throws FileNotFoundException {
90      if (!IOUtil.isURIAvailable(uri)) {
91        throw new FileNotFoundException("URI not found: " + uri);
92      }
93      this.uri = uri;
94      this.preprocessor = preprocessor;
95      this.separator = separator;
96      this.encoding = encoding;
97      this.entityDescriptor = descriptor;
98      this.initialized = false;
99      this.expectingHeader = true;
100     this.rowBased = (descriptor != null && descriptor.isRowBased() != null ?
101         descriptor.isRowBased() : true);
102   }
103 
104   // properties ------------------------------------------------------------------------------------------------------
105 
106   /**
107    * Parse all list.
108    *
109    * @param uri          the uri
110    * @param separator    the separator
111    * @param encoding     the encoding
112    * @param descriptor   the descriptor
113    * @param preprocessor the preprocessor
114    * @param patterns     the patterns
115    * @return the list
116    * @throws FileNotFoundException the file not found exception
117    */
118   public static List<Entity> parseAll(String uri, char separator,
119                                       String encoding,
120                                       ComplexTypeDescriptor descriptor,
121                                       Converter<String, String> preprocessor,
122                                       Patterns patterns)
123       throws FileNotFoundException {
124     List<Entity> list = new ArrayList<>();
125     CSVEntityIterator iterator =
126         new CSVEntityIterator(uri, descriptor, preprocessor, separator,
127             encoding);
128     DataContainer<Entity> container = new DataContainer<>();
129     while ((container = iterator.next(container)) != null) {
130       list.add(container.getData());
131     }
132     return list;
133   }
134 
135   /**
136    * Sets expecting header.
137    *
138    * @param expectHeader the expect header
139    */
140   public void setExpectingHeader(boolean expectHeader) {
141     this.expectingHeader = expectHeader;
142   }
143 
144   /**
145    * Is row based boolean.
146    *
147    * @return the boolean
148    */
149   public boolean isRowBased() {
150     return rowBased;
151   }
152 
153   /**
154    * Sets row based.
155    *
156    * @param rowBased the row based
157    */
158   public void setRowBased(boolean rowBased) {
159     this.rowBased = rowBased;
160   }
161 
162   @Override
163   public String[] getColumnNames() {
164     return columns;
165   }
166 
167   // DataIterator interface ------------------------------------------------------------------------------------------
168 
169   /**
170    * Sets columns.
171    *
172    * @param columns the columns
173    */
174   public void setColumns(String[] columns) {
175     this.expectingHeader = false;
176     if (ArrayUtil.isEmpty(columns)) {
177       this.columns = null;
178     } else {
179       this.columns = columns.clone();
180       StringUtil.trimAll(this.columns);
181     }
182   }
183 
184   @Override
185   public Class<Entity> getType() {
186     return Entity.class;
187   }
188 
189   @Override
190   public DataContainer<Entity> next(DataContainer<Entity> container) {
191     assureInitialized();
192     return source.next(container);
193   }
194 
195   @Override
196   public void close() {
197     IOUtil.close(source);
198   }
199 
200   // java.lang.Object overrides --------------------------------------------------------------------------------------
201 
202   @Override
203   public String toString() {
204     return getClass().getSimpleName() + "[uri=" + uri + ", encoding=" +
205         encoding + ", separator=" + separator +
206         ", entityName=" + entityDescriptor.getName() + "]";
207   }
208 
209   // private helpers -------------------------------------------------------------------------------------------------
210 
211   private void assureInitialized() {
212     if (!initialized) {
213       init();
214       initialized = true;
215     }
216   }
217 
218   @SuppressWarnings({"unchecked", "rawtypes"})
219   private void init() {
220     try {
221       DataIterator<String[]> cellIterator;
222       cellIterator = new CSVLineIterator(uri, separator, true, encoding);
223       if (!rowBased) {
224         cellIterator = new OrthogonalArrayIterator<>(cellIterator);
225       }
226       if (expectingHeader) {
227         setColumns(cellIterator.next(new DataContainer<>()).getData());
228       }
229       Converter<String[], Object[]> arrayConverter =
230           new ArrayConverter(String.class, Object.class,
231               preprocessor);
232       Array2EntityConverter a2eConverter =
233           new Array2EntityConverter(entityDescriptor, columns, true);
234       Converter<String[], Entity> converter =
235           new ConverterChain<>(arrayConverter, a2eConverter);
236       this.source = new ConvertingDataIterator<>(cellIterator, converter);
237     } catch (IOException e) {
238       throw new RuntimeException("Error in processing " + uri, e);
239     }
240   }
241 
242 }