View Javadoc
1   /*
2    * (c) Copyright 2006-2020 by rapiddweller GmbH & Volker Bergmann. All rights reserved.
3    *
4    * Redistribution and use in source and binary forms, with or without
5    * modification, is permitted under the terms of the
6    * GNU General Public License.
7    *
8    * For redistributing this software or a derivative work under a license other
9    * than the GPL-compatible Free Software License as defined by the Free
10   * Software Foundation or approved by OSI, you must first obtain a commercial
11   * license to this software product from rapiddweller GmbH & Volker Bergmann.
12   *
13   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14   * WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
15   * REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
17   * HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24   * POSSIBILITY OF SUCH DAMAGE.
25   */
26  
27  package com.rapiddweller.benerator.csv;
28  
29  import com.rapiddweller.benerator.dataset.DatasetUtil;
30  import com.rapiddweller.common.ConfigurationError;
31  import com.rapiddweller.common.Converter;
32  import com.rapiddweller.format.DataContainer;
33  import com.rapiddweller.format.csv.CSVLineIterator;
34  import com.rapiddweller.script.WeightedSample;
35  
36  import java.io.IOException;
37  import java.util.ArrayList;
38  import java.util.List;
39  
40  /**
41   * Provides CSV-related utility methods.<br/><br/>
42   * Created: 17.02.2010 23:20:35
43   *
44   * @author Volker Bergmann
45   * @since 0.6.0
46   */
47  public class CSVGeneratorUtil {
48  
49    /**
50     * Parse dataset files list.
51     *
52     * @param <T>             the type parameter
53     * @param datasetName     the dataset name
54     * @param separator       the separator
55     * @param nesting         the nesting
56     * @param filenamePattern the filename pattern
57     * @param encoding        the encoding
58     * @param converter       the converter
59     * @return the list
60     */
61    public static <T> List<WeightedSample<T>> parseDatasetFiles(
62        String datasetName, char separator, String nesting, String filenamePattern,
63        String encoding, Converter<String, T> converter) {
64      String[] dataFilenames;
65      if (nesting == null || datasetName == null) {
66        dataFilenames = new String[] {filenamePattern};
67      } else {
68        dataFilenames = DatasetUtil.getDataFiles(filenamePattern, datasetName, nesting);
69      }
70      List<WeightedSample<T>> samples = new ArrayList<>();
71      for (String dataFilename : dataFilenames) {
72        parseFile(dataFilename, separator, encoding, converter, samples);
73      }
74      return samples;
75    }
76  
77    /**
78     * Parse file list.
79     *
80     * @param <T>       the type parameter
81     * @param filename  the filename
82     * @param separator the separator
83     * @param encoding  the encoding
84     * @param converter the converter
85     * @return the list
86     */
87    public static <T> List<WeightedSample<T>> parseFile(String filename, char separator, String encoding,
88                                                        Converter<String, T> converter) {
89      return parseFile(filename, separator, encoding, converter, new ArrayList<>());
90    }
91  
92    /**
93     * Parse file list.
94     *
95     * @param <T>       the type parameter
96     * @param filename  the filename
97     * @param separator the separator
98     * @param encoding  the encoding
99     * @param converter the converter
100    * @param samples   the samples
101    * @return the list
102    */
103   public static <T> List<WeightedSample<T>> parseFile(String filename, char separator, String encoding,
104                                                       Converter<String, T> converter, List<WeightedSample<T>> samples) {
105     try {
106       CSVLineIterator iterator = new CSVLineIterator(filename, separator, encoding);
107       DataContainer<String[]> container = new DataContainer<>();
108       while ((container = iterator.next(container)) != null) {
109         String[] tokens = container.getData();
110         if (tokens.length == 0) {
111           continue;
112         }
113         double weight = (tokens.length < 2 || tokens[1] == null || tokens[1].trim().length() == 0 ? 1. : Double.parseDouble(tokens[1].trim()));
114         T value = converter.convert(tokens[0]);
115         WeightedSample<T> sample = new WeightedSample<>(value, weight);
116         samples.add(sample);
117       }
118       return samples;
119     } catch (IOException e) {
120       throw new ConfigurationError(e);
121     }
122   }
123 
124 }