View Javadoc
1   /*
2    * (c) Copyright 2006-2020 by rapiddweller GmbH & Volker Bergmann. All rights reserved.
3    *
4    * Redistribution and use in source and binary forms, with or without
5    * modification, is permitted under the terms of the
6    * GNU General Public License.
7    *
8    * For redistributing this software or a derivative work under a license other
9    * than the GPL-compatible Free Software License as defined by the Free
10   * Software Foundation or approved by OSI, you must first obtain a commercial
11   * license to this software product from rapiddweller GmbH & Volker Bergmann.
12   *
13   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14   * WITHOUT A WARRANTY OF ANY KIND. ALL EXPRESS OR IMPLIED CONDITIONS,
15   * REPRESENTATIONS AND WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF
16   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE
17   * HEREBY EXCLUDED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18   * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20   * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21   * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22   * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24   * POSSIBILITY OF SUCH DAMAGE.
25   */
26  
27  package com.rapiddweller.domain.organization;
28  
29  import com.rapiddweller.benerator.Generator;
30  import com.rapiddweller.benerator.GeneratorContext;
31  import com.rapiddweller.benerator.NonNullGenerator;
32  import com.rapiddweller.benerator.WeightedGenerator;
33  import com.rapiddweller.benerator.csv.WeightedDatasetCSVGenerator;
34  import com.rapiddweller.benerator.dataset.AbstractDatasetGenerator;
35  import com.rapiddweller.benerator.dataset.Dataset;
36  import com.rapiddweller.benerator.dataset.DatasetUtil;
37  import com.rapiddweller.benerator.primitive.RegexStringGenerator;
38  import com.rapiddweller.benerator.primitive.TokenCombiner;
39  import com.rapiddweller.benerator.sample.ConstantGenerator;
40  import com.rapiddweller.benerator.sample.SequencedCSVSampleGenerator;
41  import com.rapiddweller.benerator.util.ThreadSafeNonNullGenerator;
42  import com.rapiddweller.benerator.wrapper.AlternativeGenerator;
43  import com.rapiddweller.benerator.wrapper.MessageGenerator;
44  import com.rapiddweller.benerator.wrapper.ProductWrapper;
45  import com.rapiddweller.benerator.wrapper.WrapperFactory;
46  import com.rapiddweller.common.Assert;
47  import com.rapiddweller.common.ConfigurationError;
48  import com.rapiddweller.common.Encodings;
49  import com.rapiddweller.common.bean.PropertyAccessConverter;
50  import com.rapiddweller.domain.address.CityGenerator;
51  import com.rapiddweller.domain.address.Country;
52  import com.rapiddweller.domain.person.FamilyNameGenerator;
53  import com.rapiddweller.domain.person.Gender;
54  import com.rapiddweller.domain.person.GivenNameGenerator;
55  import com.rapiddweller.format.text.NameNormalizer;
56  import org.apache.logging.log4j.LogManager;
57  import org.apache.logging.log4j.Logger;
58  
59  import java.util.HashMap;
60  import java.util.Map;
61  
62  import static com.rapiddweller.benerator.util.GeneratorUtil.generateNonNull;
63  import static com.rapiddweller.benerator.util.GeneratorUtil.generateNullable;
64  
65  /**
66   * Generates company names.<br/><br/>
67   * Created: 14.03.2008 08:26:44
68   *
69   * @author Volker Bergmann
70   * @since 0.5.0
71   */
72  public class CompanyNameGenerator extends AbstractDatasetGenerator<CompanyName>
73      implements NonNullGenerator<CompanyName> {
74  
75    /**
76     * The constant LOGGER.
77     */
78    protected static final Logger LOGGER =
79        LogManager.getLogger(CompanyNameGenerator.class);
80  
81    private static final String ORG = "/com/rapiddweller/domain/organization/";
82  
83    /**
84     * The constant locationGenerators.
85     */
86    protected static final Map<String, Generator<String>> locationGenerators =
87        new HashMap<>();
88  
89    /**
90     * The Dataset name.
91     */
92    protected final String datasetName;
93    /**
94     * The Sector.
95     */
96    protected boolean sector;
97    /**
98     * The Location.
99     */
100   protected boolean location;
101   /**
102    * The Legal form.
103    */
104   protected boolean legalForm;
105 
106 
107   // Constructors ----------------------------------------------------------------------------------------------------
108 
109   /**
110    * Instantiates a new Company name generator.
111    */
112   public CompanyNameGenerator() {
113     this(true, true, true);
114   }
115 
116   /**
117    * Instantiates a new Company name generator.
118    *
119    * @param sector    the sector
120    * @param location  the location
121    * @param legalForm the legal form
122    */
123   public CompanyNameGenerator(boolean sector, boolean location,
124                               boolean legalForm) {
125     this(sector, location, legalForm, Country.getDefault().getIsoCode());
126   }
127 
128   /**
129    * Instantiates a new Company name generator.
130    *
131    * @param dataset the dataset
132    */
133   public CompanyNameGenerator(String dataset) {
134     this(true, true, true, dataset);
135   }
136 
137   /**
138    * Instantiates a new Company name generator.
139    *
140    * @param sector      the sector
141    * @param location    the location
142    * @param legalForm   the legal form
143    * @param datasetName the dataset name
144    */
145   public CompanyNameGenerator(boolean sector, boolean location,
146                               boolean legalForm, String datasetName) {
147     super(CompanyName.class, DatasetUtil.REGION_NESTING, datasetName, true);
148     LOGGER.debug("Creating instance of {} for dataset {}", getClass(),
149         datasetName);
150     this.sector = sector;
151     this.location = location;
152     this.legalForm = legalForm;
153     this.datasetName = datasetName;
154     setDataset(datasetName);
155   }
156 
157 
158   // properties -----------------------------------------------------------------------------------------------------------
159 
160   /**
161    * Is sector boolean.
162    *
163    * @return the boolean
164    */
165   public boolean isSector() {
166     return sector;
167   }
168 
169   /**
170    * Sets sector.
171    *
172    * @param sector the sector
173    */
174   public void setSector(boolean sector) {
175     this.sector = sector;
176   }
177 
178   /**
179    * Is location boolean.
180    *
181    * @return the boolean
182    */
183   public boolean isLocation() {
184     return location;
185   }
186 
187   /**
188    * Sets location.
189    *
190    * @param location the location
191    */
192   public void setLocation(boolean location) {
193     this.location = location;
194   }
195 
196   /**
197    * Is legal form boolean.
198    *
199    * @return the boolean
200    */
201   public boolean isLegalForm() {
202     return legalForm;
203   }
204 
205   /**
206    * Sets legal form.
207    *
208    * @param legalForm the legal form
209    */
210   public void setLegalForm(boolean legalForm) {
211     this.legalForm = legalForm;
212   }
213 
214 
215   // interface -------------------------------------------------------------------------------------------------------
216 
217   @Override
218   protected boolean isAtomic(Dataset dataset) {
219     Country country = Country.getInstance(dataset.getName(), false);
220     return (country != null);
221   }
222 
223   @Override
224   protected WeightedGenerator<CompanyName> createGeneratorForAtomicDataset(
225       Dataset dataset) {
226     String isoCode = dataset.getName();
227     Country country = Country.getInstance(isoCode, false);
228     if (country == null) {
229       throw new ConfigurationError("Unknown country code: " + isoCode);
230     }
231     return new CountryCompanyNameGenerator(country);
232   }
233 
234   @Override
235   public CompanyName generate() {
236     ProductWrapper<CompanyName> wrapper = generate(getResultWrapper());
237     return (wrapper != null ? wrapper.unwrap() : null);
238   }
239 
240 
241   // helper class ----------------------------------------------------------------------------------------------------
242 
243   /**
244    * The type Country company name generator.
245    */
246   class CountryCompanyNameGenerator
247       extends ThreadSafeNonNullGenerator<CompanyName>
248       implements WeightedGenerator<CompanyName> {
249 
250     private final Country country;
251     private AlternativeGenerator<String> shortNameGenerator;
252     private SectorGenerator sectorGenerator;
253     private WeightedDatasetCSVGenerator<String> legalFormGenerator;
254     private Generator<String> locationGenerator;
255 
256     /**
257      * Instantiates a new Country company name generator.
258      *
259      * @param country the country
260      */
261     public CountryCompanyNameGenerator(Country country) {
262       Assert.notNull(country, "country");
263       this.country = country;
264     }
265 
266     @Override
267     public Class<CompanyName> getGeneratedType() {
268       return CompanyName.class;
269     }
270 
271     @Override
272     public synchronized void init(GeneratorContext context) {
273       try {
274         super.init(context);
275         initWithDataset(country.getIsoCode(), context);
276       } catch (Exception e) {
277         String fallbackDataset = DatasetUtil.fallbackRegionName();
278         LOGGER.warn(
279             "Error initializing location generator for dataset " +
280                 datasetName + ", falling back to " +
281                 fallbackDataset);
282         initWithDataset(fallbackDataset, context);
283       }
284     }
285 
286     /**
287      * Init with dataset.
288      *
289      * @param datasetToUse the dataset to use
290      * @param context      the context
291      */
292     public void initWithDataset(String datasetToUse,
293                                 GeneratorContext context) {
294       createAndInitLocationGenerator(datasetToUse);
295       initLegalFormGenerator(datasetToUse);
296       initSectorGenerator(datasetToUse);
297       createAndInitShortNameGenerator(datasetToUse, context);
298       super.init(context);
299     }
300 
301     @Override
302     public CompanyName generate() {
303       CompanyNameation/CompanyName.html#CompanyName">CompanyName name = new CompanyName();
304       name.setShortName(generateNonNull(shortNameGenerator));
305 
306       if (sectorGenerator != null) {
307         String sector = generateNullable(sectorGenerator);
308         if (sector != null) {
309           name.setSector(sector);
310         }
311       }
312       if (locationGenerator != null) {
313         String location = generateNullable(locationGenerator);
314         if (location != null) {
315           name.setLocation(location);
316         }
317       }
318       if (legalFormGenerator != null) {
319         name.setLegalForm(generateNullable(legalFormGenerator));
320       }
321       name.setDatasetName(datasetName);
322       return name;
323     }
324 
325     @Override
326     public double getWeight() {
327       return country.getPopulation();
328     }
329 
330     @Override
331     public String toString() {
332       return getClass().getSimpleName() + '[' + datasetName + ']';
333     }
334 
335     // private helpers -------------------------------------------------------------------------------------------------
336 
337     private void createAndInitShortNameGenerator(String datasetToUse,
338                                                  GeneratorContext context) {
339       shortNameGenerator = new AlternativeGenerator<>(String.class);
340       shortNameGenerator.addSource(createInitialsNameGenerator());
341       addSourceIfNotNull(createPersonNameGenerator(datasetToUse),
342           shortNameGenerator);
343       addSourceIfNotNull(createArtificialNameGenerator(),
344           shortNameGenerator);
345       addSourceIfNotNull(createTechNameGenerator(), shortNameGenerator);
346       shortNameGenerator.init(context);
347     }
348 
349     private void addSourceIfNotNull(Generator<String> source,
350                                     AlternativeGenerator<String> master) {
351       if (source != null) {
352         master.addSource(source);
353       }
354     }
355 
356     private RegexStringGenerator createInitialsNameGenerator() {
357       return new RegexStringGenerator("[A-Z]{3}");
358     }
359 
360     private MessageGenerator createTechNameGenerator() {
361       try {
362         return new MessageGenerator("{0}{1}",
363             new SequencedCSVSampleGenerator<String>(
364                 ORG + "tech1.csv"),
365             new SequencedCSVSampleGenerator<String>(
366                 ORG + "tech2.csv")
367         );
368       } catch (Exception e) {
369         LOGGER.info("Cannot create technical company name generator: " +
370             e.getMessage());
371         return null;
372       }
373     }
374 
375     private TokenCombiner createArtificialNameGenerator() {
376       try {
377         return new TokenCombiner(ORG + "artificialName.csv", false, '-',
378             Encodings.UTF_8, false);
379       } catch (Exception e) {
380         LOGGER.info(
381             "Cannot create artificial company name generator: " +
382                 e.getMessage());
383         return null;
384       }
385     }
386 
387     private MessageGenerator createPersonNameGenerator(
388         String datasetToUse) {
389       try {
390         return new MessageGenerator("{0} {1}",
391             GivenNameGenerator
392                 .sharedInstance(datasetToUse, Gender.MALE),
393             FamilyNameGenerator.sharedInstance(datasetToUse)
394         );
395       } catch (Exception e) {
396         LOGGER.info(
397             "Cannot create person-based company name generator: " +
398                 e.getMessage());
399         return null;
400       }
401     }
402 
403     private void initSectorGenerator(String datasetName) {
404       if (sector) {
405         try {
406           Country country = Country.getInstance(datasetName);
407           sectorGenerator = new SectorGenerator(
408               country.getDefaultLanguageLocale());
409           sectorGenerator.init(context);
410         } catch (Exception e) {
411           if ("US".equals(datasetName)) {
412             throw new ConfigurationError(
413                 "Failed to initialize SectorGenerator with US dataset",
414                 e);
415           }
416           LOGGER.info("Cannot create sector generator: " +
417               e.getMessage() + ". Falling back to US");
418           initSectorGenerator("US");
419         }
420       }
421     }
422 
423     private void initLegalFormGenerator(String datasetName) {
424       if (legalForm) {
425 
426         try {
427           legalFormGenerator = new LegalFormGenerator(datasetName);
428           legalFormGenerator.init(context);
429         } catch (Exception e) {
430           LOGGER.error("Cannot create legal form generator: " +
431               e.getMessage() + ". Falling back to US. ");
432           initLegalFormGenerator("US");
433         }
434       }
435     }
436 
437     private void createAndInitLocationGenerator(String datasetName) {
438       locationGenerator = locationGenerators.get(datasetName);
439       if (locationGenerator == null) {
440         double nullQuota = 0.8;
441         Country country = Country.getInstance(datasetName);
442         Generator<String> locationBaseGen;
443         if (location && country != null) {
444           try {
445             Generator<String> cityGen =
446                 WrapperFactory.applyConverter(
447                     new CityGenerator(country.getIsoCode()),
448                     new PropertyAccessConverter("name"),
449                     new NameNormalizer());
450             if (DatasetUtil.getDataset(DatasetUtil.REGION_NESTING,
451                 datasetName).isAtomic()) {
452               locationBaseGen =
453                   new AlternativeGenerator<>(String.class,
454                       new ConstantGenerator<>(
455                           country.getLocalName()),
456                       cityGen);
457             } else {
458               locationBaseGen = cityGen;
459             }
460           } catch (Exception e) {
461             LOGGER.info("Cannot create location generator: " +
462                 e.getMessage());
463             locationBaseGen = new ConstantGenerator<>(null);
464           }
465         } else {
466           locationBaseGen = new ConstantGenerator<>(null);
467         }
468         locationGenerator =
469             WrapperFactory.injectNulls(locationBaseGen, nullQuota);
470         locationGenerator.init(context);
471         locationGenerators.put(datasetName, locationGenerator);
472       }
473     }
474 
475   }
476 
477 }