View Javadoc
1   /*
2    * (c) Copyright 2006-2020 by rapiddweller GmbH & Volker Bergmann. All rights reserved.
3    *
4    * Redistribution and use in source and binary forms, with or without
5    * modification, is permitted under the terms of the
6    * GNU General Public License.
7    *
8    * For redistributing this software or a derivative work under a license other
9    * than the GPL-compatible Free Software License as defined by the Free
10   * Software Foundation or approved by OSI, you must first obtain a commercial
11   * license to this software product from rapiddweller GmbH & Volker Bergmann.
12   *
13   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14   * WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
15   * REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
17   * HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24   * POSSIBILITY OF SUCH DAMAGE.
25   */
26  
27  package com.rapiddweller.benerator.util;
28  
29  import com.rapiddweller.common.IOUtil;
30  import com.rapiddweller.common.ReaderLineIterator;
31  import com.rapiddweller.common.StringUtil;
32  import com.rapiddweller.common.ui.ConsoleInfoPrinter;
33  import org.apache.logging.log4j.LogManager;
34  import org.apache.logging.log4j.Logger;
35  
36  import java.io.BufferedReader;
37  import java.io.BufferedWriter;
38  import java.io.FileWriter;
39  import java.io.IOException;
40  import java.io.PrintWriter;
41  import java.util.ArrayList;
42  import java.util.List;
43  
44  /**
45   * Reads a text file, shuffles its lines and writes it to another file.<br/>
46   * <br/>
47   * Created: 16.07.2007 20:29:10
48   */
49  public class LineShuffler {
50  
51    /**
52     * The constant logger.
53     */
54    public static final Logger logger = LogManager.getLogger(LineShuffler.class);
55  
56    /**
57     * The entry point of application.
58     *
59     * @param args the input arguments
60     * @throws IOException the io exception
61     */
62    public static void main(String[] args) throws IOException {
63      if (args.length < 2) {
64        printHelp();
65        System.exit(-1);
66      }
67      String inFilename = args[0];
68      String outFilename = args[1];
69      int bufferSize = (args.length > 2 ? Integer.parseInt(args[2]) : 100000);
70      shuffle(inFilename, outFilename, bufferSize);
71    }
72  
73    /**
74     * Shuffle.
75     *
76     * @param inFilename  the in filename
77     * @param outFilename the out filename
78     * @param bufferSize  the buffer size
79     * @throws IOException the io exception
80     */
81    public static void shuffle(String inFilename, String outFilename, int bufferSize) throws IOException {
82      logger.info("shuffling " + inFilename + " and writing to " + outFilename + " (max. " + bufferSize + " lines)");
83      ReaderLineIterator iterator = new ReaderLineIterator(new BufferedReader(IOUtil.getReaderForURI(inFilename)));
84      List<String> lines = read(bufferSize, iterator);
85      shuffle(lines);
86      save(lines, outFilename);
87    }
88  
89    /**
90     * Shuffle.
91     *
92     * @param lines the lines
93     */
94    public static void shuffle(List<String> lines) {
95      int size = lines.size();
96      //Generator<Integer> indexGenerator = new IntegerGenerator(0, size - 1, 1, Sequence.RANDOM);
97      int iterations = size / 2;
98      for (int i = 0; i < iterations; i++) {
99        int i1 = RandomUtil.randomInt(0, size - 1);
100       int i2;
101       do {
102         i2 = RandomUtil.randomInt(0, size - 1);
103       } while (i1 == i2);
104       String tmp = lines.get(i1);
105       lines.set(i1, lines.get(i2));
106       lines.set(i2, tmp);
107     }
108   }
109 
110   // private helpers -------------------------------------------------------------------------------------------------
111 
112   private static List<String> read(int bufferSize, ReaderLineIterator iterator) {
113     List<String> lines = new ArrayList<>(Math.max(100000, bufferSize));
114     int lineCount = 0;
115     while (iterator.hasNext() && lineCount < bufferSize) {
116       String line = iterator.next();
117       if (!StringUtil.isEmpty(line)) {
118         lines.add(line);
119         lineCount++;
120         if (lineCount % 100000 == 99999) {
121           logger.info("parsed " + lineCount + " lines");
122         }
123       }
124     }
125     return lines;
126   }
127 
128   private static void save(List<String> lines, String outputFilename) throws IOException {
129     logger.info("saving " + outputFilename + "...");
130     PrintWriter printer = new PrintWriter(new BufferedWriter(new FileWriter(outputFilename)));
131     try {
132       for (String line : lines) {
133         printer.println(line);
134       }
135     } finally {
136       IOUtil.close(printer);
137     }
138   }
139 
140   private static void printHelp() {
141     ConsoleInfoPrinter.printHelp("Parameters: inFile outFile [buffer size]");
142   }
143 }