View Javadoc
1   /*
2    * (c) Copyright 2006-2020 by rapiddweller GmbH & Volker Bergmann. All rights reserved.
3    *
4    * Redistribution and use in source and binary forms, with or without
5    * modification, is permitted under the terms of the
6    * GNU General Public License.
7    *
8    * For redistributing this software or a derivative work under a license other
9    * than the GPL-compatible Free Software License as defined by the Free
10   * Software Foundation or approved by OSI, you must first obtain a commercial
11   * license to this software product from rapiddweller GmbH & Volker Bergmann.
12   *
13   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14   * WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
15   * REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
17   * HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24   * POSSIBILITY OF SUCH DAMAGE.
25   */
26  
27  package com.rapiddweller.platform.xls;
28  
29  import com.rapiddweller.benerator.engine.BeneratorContext;
30  import com.rapiddweller.common.ArrayUtil;
31  import com.rapiddweller.common.CollectionUtil;
32  import com.rapiddweller.common.ConfigurationError;
33  import com.rapiddweller.common.Converter;
34  import com.rapiddweller.common.IOUtil;
35  import com.rapiddweller.common.StringUtil;
36  import com.rapiddweller.common.converter.ConverterManager;
37  import com.rapiddweller.common.converter.ToStringConverter;
38  import com.rapiddweller.format.DataContainer;
39  import com.rapiddweller.format.DataIterator;
40  import com.rapiddweller.format.util.OrthogonalArrayIterator;
41  import com.rapiddweller.format.util.ThreadLocalDataContainer;
42  import com.rapiddweller.format.xls.XLSLineIterator;
43  import com.rapiddweller.model.data.ComplexTypeDescriptor;
44  import com.rapiddweller.model.data.ComponentDescriptor;
45  import com.rapiddweller.model.data.DataModel;
46  import com.rapiddweller.model.data.Entity;
47  import com.rapiddweller.platform.array.Array2EntityConverter;
48  import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
49  import org.apache.poi.ss.usermodel.Sheet;
50  import org.apache.poi.ss.usermodel.Workbook;
51  import org.apache.poi.ss.usermodel.WorkbookFactory;
52  
53  import java.io.IOException;
54  import java.util.ArrayList;
55  import java.util.List;
56  
57  /**
58   * Iterates a single sheet of an XLS document and maps its rows to entities.<br/><br/>
59   * Created: 23.06.2014 17:20:19
60   *
61   * @author Volker Bergmann
62   * @since 0.9.5
63   */
64  public class SingleSheetXLSEntityIterator implements DataIterator<Entity> {
65  
66    private String uri;
67    private final Workbook workbook;
68    private final boolean rowBased;
69    private final boolean formatted;
70    private final String emptyMarker;
71    private DataIterator<Object[]> source;
72    private final Converter<String, ?> preprocessor;
73    private Array2EntityConverter converter;
74    private Object[] buffer;
75    private final ThreadLocalDataContainer<Object[]> sourceContainer =
76        new ThreadLocalDataContainer<>();
77    private ComplexTypeDescriptor entityDescriptor;
78    private final BeneratorContext context;
79    private String[] headers;
80  
81  
82    // constructors ----------------------------------------------------------------------------------------------------
83  
84    /**
85     * Instantiates a new Single sheet xls entity iterator.
86     *
87     * @param uri          the uri
88     * @param sheetName    the sheet name
89     * @param preprocessor the preprocessor
90     * @param entityType   the entity type
91     * @param context      the context
92     * @param rowBased     the row based
93     * @param formatted    the formatted
94     * @param emptyMarker  the empty marker
95     * @throws InvalidFormatException the invalid format exception
96     * @throws IOException            the io exception
97     */
98    public SingleSheetXLSEntityIterator(String uri, String sheetName,
99                                        Converter<String, ?> preprocessor,
100                                       ComplexTypeDescriptor entityType,
101                                       BeneratorContext context,
102                                       boolean rowBased, boolean formatted,
103                                       String emptyMarker)
104       throws InvalidFormatException, IOException {
105     this(loadSheet(uri, sheetName), preprocessor, entityType, context,
106         rowBased, formatted, emptyMarker);
107     this.uri = uri;
108   }
109 
110   /**
111    * Instantiates a new Single sheet xls entity iterator.
112    *
113    * @param sheet            the sheet
114    * @param preprocessor     the preprocessor
115    * @param entityDescriptor the entity descriptor
116    * @param context          the context
117    * @param rowBased         the row based
118    * @param formatted        the formatted
119    * @param emptyMarker      the empty marker
120    */
121   public SingleSheetXLSEntityIterator(Sheet sheet,
122                                       Converter<String, ?> preprocessor,
123                                       ComplexTypeDescriptor entityDescriptor,
124                                       BeneratorContext context,
125                                       boolean rowBased, boolean formatted,
126                                       String emptyMarker) {
127     this.workbook = sheet.getWorkbook();
128     this.preprocessor = preprocessor;
129     this.context = context;
130     this.rowBased = rowBased;
131     this.formatted = formatted;
132     this.emptyMarker = emptyMarker;
133     this.source = createRawIterator(sheet, rowBased, preprocessor);
134 
135     // if not specified explicitly, determine entity type by sheet name
136     this.entityDescriptor = entityDescriptor;
137     if (this.entityDescriptor == null) {
138       String entityTypeName = sheet.getSheetName();
139       if (context != null) {
140         DataModel dataModel = context.getDataModel();
141         this.entityDescriptor = (ComplexTypeDescriptor) dataModel
142             .getTypeDescriptor(entityTypeName);
143         if (this.entityDescriptor != null) {
144           this.entityDescriptor =
145               new ComplexTypeDescriptor(entityTypeName + "_",
146                   context.getLocalDescriptorProvider());
147         } else {
148           this.entityDescriptor = createDescriptor(entityTypeName);
149         }
150       } else {
151         this.entityDescriptor = createDescriptor(entityTypeName);
152       }
153     }
154 
155     // parse headers
156     parseHeaders();
157     if (headers == null) {
158       this.source = null; // empty sheet
159       return;
160     }
161 
162     // parse first data row
163     DataContainer<Object[]> tmp = this.source.next(sourceContainer.get());
164     if (tmp == null) {
165       this.source = null; // no data in sheet
166       return;
167     }
168     this.buffer = tmp.getData();
169     converter = new Array2EntityConverter(this.entityDescriptor, headers,
170         false);
171   }
172 
173 
174   // DataIterator interface implementation ---------------------------------------------------------------------------
175 
176   /**
177    * Parse all list.
178    *
179    * @param uri          the uri
180    * @param sheetName    the sheet name
181    * @param preprocessor the preprocessor
182    * @param type         the type
183    * @param context      the context
184    * @param rowBased     the row based
185    * @param formatted    the formatted
186    * @param emptyMarker  the empty marker
187    * @return the list
188    * @throws IOException            the io exception
189    * @throws InvalidFormatException the invalid format exception
190    */
191   public static List<Entity> parseAll(String uri, String sheetName,
192                                       Converter<String, ?> preprocessor,
193                                       ComplexTypeDescriptor type,
194                                       BeneratorContext context,
195                                       boolean rowBased, boolean formatted,
196                                       String emptyMarker)
197       throws IOException, InvalidFormatException {
198     Sheet sheet = loadSheet(uri, sheetName);
199     return parseAll(sheet, preprocessor, type, context, rowBased, formatted,
200         emptyMarker);
201   }
202 
203   /**
204    * Parse all list.
205    *
206    * @param sheet        the sheet
207    * @param preprocessor the preprocessor
208    * @param type         the type
209    * @param context      the context
210    * @param rowBased     the row based
211    * @param formatted    the formatted
212    * @param emptyMarker  the empty marker
213    * @return the list
214    */
215   public static List<Entity> parseAll(Sheet sheet,
216                                       Converter<String, ?> preprocessor,
217                                       ComplexTypeDescriptor type,
218                                       BeneratorContext context,
219                                       boolean rowBased, boolean formatted,
220                                       String emptyMarker) {
221     List<Entity> list = new ArrayList<>();
222     SingleSheetXLSEntityIterator iterator =
223         new SingleSheetXLSEntityIterator(sheet, preprocessor, type,
224             context, rowBased, formatted, emptyMarker);
225     DataContainer<Entity> container = new DataContainer<>();
226     while ((container = iterator.next(container)) != null) {
227       list.add(container.getData());
228     }
229     return list;
230   }
231 
232   private static Sheet loadSheet(String uri, String sheetName)
233       throws IOException {
234     Workbook workbook =
235         WorkbookFactory.create(IOUtil.getInputStreamForURI(uri));
236     Sheet sheet = workbook.getSheet(sheetName);
237     if (sheet == null) {
238       throw new ConfigurationError(
239           "Sheet '" + sheetName + "' not found in file " + uri);
240     }
241     return sheet;
242   }
243 
244 
245   // convenience methods ---------------------------------------------------------------------------------------------
246 
247   private static String[] normalizeHeaders(Object[] rawHeaders) {
248     String[] headers = (String[]) ConverterManager
249         .convertAll(rawHeaders, new ToStringConverter(), String.class);
250     StringUtil.trimAll(headers);
251     int lastNonEmptyIndex = headers.length - 1;
252     while (lastNonEmptyIndex >= 0 &&
253         StringUtil.isEmpty(headers[lastNonEmptyIndex])) {
254       lastNonEmptyIndex--;
255     }
256     if (lastNonEmptyIndex < headers.length - 1) {
257       headers = ArrayUtil.copyOfRange(headers, 0, lastNonEmptyIndex + 1);
258     }
259     return headers;
260   }
261 
262   @Override
263   public Class<Entity> getType() {
264     return Entity.class;
265   }
266 
267 
268   // private helper methods --------------------------------------------------
269 
270   @Override
271   public DataContainer<Entity> next(DataContainer<Entity> container) {
272     if (source == null) {
273       return null;
274     }
275     Object[] rawData;
276     if (buffer != null) {
277       rawData = buffer;
278       buffer = null;
279     } else {
280       DataContainer<Object[]> tmp = source.next(sourceContainer.get());
281       if (tmp == null) {
282         return null;
283       }
284       rawData = tmp.getData();
285     }
286     resolveCollections(rawData);
287     Entity result = converter.convert(rawData);
288     return container.setData(result);
289   }
290 
291   @Override
292   public void close() {
293     IOUtil.close(source);
294   }
295 
296   private void parseHeaders() {
297     DataContainer<Object[]> tmp = this.source.next(sourceContainer.get());
298     this.headers = (tmp != null ? normalizeHeaders(tmp.getData()) : null);
299   }
300 
301   private DataIterator<Object[]> createRawIterator(Sheet sheet,
302                                                    boolean rowBased,
303                                                    Converter<String, ?> preprocessor) {
304     XLSLineIterator iterator =
305         new XLSLineIterator(sheet, false, formatted, preprocessor);
306     if (emptyMarker != null) {
307       iterator.setEmptyMarker(emptyMarker);
308     }
309     if (!rowBased) {
310       return new OrthogonalArrayIterator<>(iterator);
311     }
312     return iterator;
313   }
314 
315   private ComplexTypeDescriptor createDescriptor(String entityTypeName) {
316     ComplexTypeDescriptor descriptor;
317     descriptor = new ComplexTypeDescriptor(entityTypeName,
318         context.getLocalDescriptorProvider());
319     context.addLocalType(descriptor);
320     return descriptor;
321   }
322 
323   private void resolveCollections(Object[] rawData) {
324     String colRefPrefix = PlatformDescriptor.getCollectionReferencePrefix();
325     for (int i = 0; i < rawData.length; i++) {
326       Object cellValue = rawData[i];
327       if (cellValue instanceof String &&
328           ((String) cellValue).startsWith(colRefPrefix)) {
329         String tabName =
330             ((String) cellValue).substring(colRefPrefix.length());
331         ComponentDescriptor component =
332             entityDescriptor.getComponent(headers[i]);
333         ComplexTypeDescriptor componentType = (component != null ?
334             (ComplexTypeDescriptor) component.getTypeDescriptor() :
335             null);
336         rawData[i] = mapTabToArray(tabName, componentType);
337       }
338     }
339   }
340 
341   private Entity[] mapTabToArray(String tabName, ComplexTypeDescriptor type) {
342     Sheet sheet = getSheet(tabName);
343     List<Entity> elements =
344         parseAll(sheet, preprocessor, type, context, rowBased,
345             formatted, emptyMarker);
346     return CollectionUtil.toArray(elements, Entity.class);
347   }
348 
349   private Sheet getSheet(String tabName) {
350     for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
351       Sheet candidate = workbook.getSheetAt(i);
352       if (candidate.getSheetName().trim()
353           .equalsIgnoreCase(tabName.trim())) {
354         return candidate;
355       }
356     }
357     // tab not found
358     throw new ConfigurationError("Tab '" + tabName + "' not found" +
359         (uri != null ? " in " + uri : ""));
360   }
361 
362 
363   // java.lang.Object overrides --------------------------------------------------------------------------------------
364 
365   @Override
366   public String toString() {
367     return getClass().getSimpleName() + "[" + source + "]";
368   }
369 
370 }