1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package com.rapiddweller.benerator.util;
28
29 import com.rapiddweller.common.IOUtil;
30 import com.rapiddweller.common.ReaderLineIterator;
31 import com.rapiddweller.common.StringUtil;
32 import com.rapiddweller.common.ui.ConsoleInfoPrinter;
33 import org.apache.logging.log4j.LogManager;
34 import org.apache.logging.log4j.Logger;
35
36 import java.io.BufferedReader;
37 import java.io.BufferedWriter;
38 import java.io.FileWriter;
39 import java.io.IOException;
40 import java.io.PrintWriter;
41 import java.util.ArrayList;
42 import java.util.List;
43
44
45
46
47
48
49 public class LineShuffler {
50
51
52
53
54 public static final Logger logger = LogManager.getLogger(LineShuffler.class);
55
56
57
58
59
60
61
62 public static void main(String[] args) throws IOException {
63 if (args.length < 2) {
64 printHelp();
65 System.exit(-1);
66 }
67 String inFilename = args[0];
68 String outFilename = args[1];
69 int bufferSize = (args.length > 2 ? Integer.parseInt(args[2]) : 100000);
70 shuffle(inFilename, outFilename, bufferSize);
71 }
72
73
74
75
76
77
78
79
80
81 public static void shuffle(String inFilename, String outFilename, int bufferSize) throws IOException {
82 logger.info("shuffling " + inFilename + " and writing to " + outFilename + " (max. " + bufferSize + " lines)");
83 ReaderLineIterator iterator = new ReaderLineIterator(new BufferedReader(IOUtil.getReaderForURI(inFilename)));
84 List<String> lines = read(bufferSize, iterator);
85 shuffle(lines);
86 save(lines, outFilename);
87 }
88
89
90
91
92
93
94 public static void shuffle(List<String> lines) {
95 int size = lines.size();
96
97 int iterations = size / 2;
98 for (int i = 0; i < iterations; i++) {
99 int i1 = RandomUtil.randomInt(0, size - 1);
100 int i2;
101 do {
102 i2 = RandomUtil.randomInt(0, size - 1);
103 } while (i1 == i2);
104 String tmp = lines.get(i1);
105 lines.set(i1, lines.get(i2));
106 lines.set(i2, tmp);
107 }
108 }
109
110
111
112 private static List<String> read(int bufferSize, ReaderLineIterator iterator) {
113 List<String> lines = new ArrayList<>(Math.max(100000, bufferSize));
114 int lineCount = 0;
115 while (iterator.hasNext() && lineCount < bufferSize) {
116 String line = iterator.next();
117 if (!StringUtil.isEmpty(line)) {
118 lines.add(line);
119 lineCount++;
120 if (lineCount % 100000 == 99999) {
121 logger.info("parsed " + lineCount + " lines");
122 }
123 }
124 }
125 return lines;
126 }
127
128 private static void save(List<String> lines, String outputFilename) throws IOException {
129 logger.info("saving " + outputFilename + "...");
130 PrintWriter printer = new PrintWriter(new BufferedWriter(new FileWriter(outputFilename)));
131 try {
132 for (String line : lines) {
133 printer.println(line);
134 }
135 } finally {
136 IOUtil.close(printer);
137 }
138 }
139
140 private static void printHelp() {
141 ConsoleInfoPrinter.printHelp("Parameters: inFile outFile [buffer size]");
142 }
143 }