001/*
002 * Copyright 2016-2018 Ping Identity Corporation
003 * All Rights Reserved.
004 */
005/*
006 * Copyright (C) 2016-2018 Ping Identity Corporation
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021package com.unboundid.ldap.sdk.unboundidds.tools;
022
023
024
025import java.io.File;
026import java.io.FileOutputStream;
027import java.io.InputStream;
028import java.io.IOException;
029import java.io.OutputStream;
030import java.util.ArrayList;
031import java.util.Collections;
032import java.util.LinkedHashMap;
033import java.util.LinkedHashSet;
034import java.util.List;
035import java.util.Map;
036import java.util.Set;
037import java.util.TreeMap;
038import java.util.concurrent.atomic.AtomicLong;
039import java.util.zip.GZIPOutputStream;
040
041import com.unboundid.ldap.sdk.Filter;
042import com.unboundid.ldap.sdk.LDAPException;
043import com.unboundid.ldap.sdk.ResultCode;
044import com.unboundid.ldap.sdk.Version;
045import com.unboundid.ldap.sdk.schema.Schema;
046import com.unboundid.ldif.LDIFException;
047import com.unboundid.ldif.LDIFReader;
048import com.unboundid.util.ByteStringBuffer;
049import com.unboundid.util.CommandLineTool;
050import com.unboundid.util.Debug;
051import com.unboundid.util.ObjectPair;
052import com.unboundid.util.PassphraseEncryptedOutputStream;
053import com.unboundid.util.StaticUtils;
054import com.unboundid.util.ThreadSafety;
055import com.unboundid.util.ThreadSafetyLevel;
056import com.unboundid.util.args.ArgumentException;
057import com.unboundid.util.args.ArgumentParser;
058import com.unboundid.util.args.BooleanArgument;
059import com.unboundid.util.args.DNArgument;
060import com.unboundid.util.args.FileArgument;
061import com.unboundid.util.args.FilterArgument;
062import com.unboundid.util.args.IntegerArgument;
063import com.unboundid.util.args.SubCommand;
064import com.unboundid.util.args.StringArgument;
065
066import static com.unboundid.ldap.sdk.unboundidds.tools.ToolMessages.*;
067
068
069
070/**
071 * This class provides a command-line tool that can be used to split an LDIF
072 * file below a specified base DN.  This can be used to help initialize an
073 * entry-balancing deployment for use with the Directory Proxy Server.
074 * <BR>
075 * <BLOCKQUOTE>
076 *   <B>NOTE:</B>  This class, and other classes within the
077 *   {@code com.unboundid.ldap.sdk.unboundidds} package structure, are only
078 *   supported for use against Ping Identity, UnboundID, and
079 *   Nokia/Alcatel-Lucent 8661 server products.  These classes provide support
080 *   for proprietary functionality or for external specifications that are not
081 *   considered stable or mature enough to be guaranteed to work in an
082 *   interoperable way with other types of LDAP servers.
083 * </BLOCKQUOTE>
084 * <BR>
085 * It supports a number of algorithms for determining how to split the data,
086 * including:
087 * <UL>
088 *   <LI>
089 *     split-using-hash-on-rdn -- The tool will compute a digest of the DN
090 *     component that is immediately below the split base DN, and will use a
091 *     modulus to select a backend set for a given entry.  Since the split is
092 *     based purely on computation involving the DN, the there is no need for
093 *     caching to ensure that children are placed in the same sets as their
094 *     parent, which allows it to run effectively with a small memory footprint.
095 *   </LI>
096 *   <LI>
097 *     split-using-hash-on-attribute -- The tool will compute a digest of the
098 *     value(s) of a specified attribute, and will use a modulus to select a
099 *     backend set for a given entry.  This hash will only be computed for
100 *     entries immediately below the split base DN, and a cache will be used to
101 *     ensure that entries more than one level below the split base DN are
102 *     placed in the same backend set as their parent.
103 *   </LI>
104 *   <LI>
105 *     split-using-fewest-entries -- When examining an entry immediately below
106 *     the split base DN, the tool will place that entry in the set that has the
107 *     fewest entries.  For flat DITs in which entries only exist one level
108 *     below the split base DN, this will effectively ensure a round-robin
109 *     distribution.  But for cases in which there are branches of varying sizes
110 *     below the split base DN, this can help ensure that entries are more
111 *     evenly distributed across backend sets.  A cache will be used to ensure
112 *     that entries more than one level below the split base DN are placed in
113 *     the same backend set as their parent.
114 *   </LI>
115 *   <LI>
116 *     split-using-filter -- When examining an entry immediately below the split
117 *     base DN, a series of filters will be evaluated against that entry, which
118 *     each filter associated with a specific backend set.  If an entry doesn't
119 *     match any of the provided filters, an RDN hash can be used to select the
120 *     set.  A cache will be used to ensure that entries more than one level
121 *     below the split base DN are placed in the same backend set as their
122 *     parent.
123 *   </LI>
124 * </UL>
125 */
126@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE)
127public final class SplitLDIF
128     extends CommandLineTool
129{
130  /**
131   * The maximum length of any message to write to standard output or standard
132   * error.
133   */
134  private static final int MAX_OUTPUT_LINE_LENGTH =
135       StaticUtils.TERMINAL_WIDTH_COLUMNS - 1;
136
137
138
139  // The global arguments used by this tool.
140  private BooleanArgument addEntriesOutsideSplitBaseDNToAllSets = null;
141  private BooleanArgument addEntriesOutsideSplitBaseDNToDedicatedSet = null;
142  private BooleanArgument compressTarget = null;
143  private BooleanArgument encryptTarget = null;
144  private BooleanArgument sourceCompressed = null;
145  private DNArgument splitBaseDN = null;
146  private FileArgument encryptionPassphraseFile = null;
147  private FileArgument schemaPath = null;
148  private FileArgument sourceLDIF = null;
149  private FileArgument targetLDIFBasePath = null;
150  private IntegerArgument numThreads = null;
151
152  // The arguments used to split using a hash of the RDN.
153  private IntegerArgument splitUsingHashOnRDNNumSets = null;
154  private SubCommand splitUsingHashOnRDN = null;
155
156  // The arguments used to split using a hash on a specified attribute.
157  private BooleanArgument splitUsingHashOnAttributeAssumeFlatDIT = null;
158  private BooleanArgument splitUsingHashOnAttributeUseAllValues = null;
159  private IntegerArgument splitUsingHashOnAttributeNumSets = null;
160  private StringArgument splitUsingHashOnAttributeAttributeName = null;
161  private SubCommand splitUsingHashOnAttribute = null;
162
163  // The arguments used to choose the set with the fewest entries.
164  private BooleanArgument splitUsingFewestEntriesAssumeFlatDIT = null;
165  private IntegerArgument splitUsingFewestEntriesNumSets = null;
166  private SubCommand splitUsingFewestEntries = null;
167
168  // The arguments used to choose the set using a provided set of filters.
169  private BooleanArgument splitUsingFilterAssumeFlatDIT = null;
170  private FilterArgument splitUsingFilterFilter = null;
171  private SubCommand splitUsingFilter = null;
172
173
174
175  /**
176   * Runs the tool with the provided set of command-line arguments.
177   *
178   * @param  args  The command-line arguments provided to this tool.
179   */
180  public static void main(final String... args)
181  {
182    final ResultCode resultCode = main(System.out, System.err, args);
183    if (resultCode != ResultCode.SUCCESS)
184    {
185      System.exit(resultCode.intValue());
186    }
187  }
188
189
190
191  /**
192   * Runs the tool with the provided set of command-line arguments.
193   *
194   * @param  out   The output stream used for standard output.  It may be
195   *               {@code null} if standard output should be suppressed.
196   * @param  err   The output stream used for standard error.  It may be
197   *               {@code null} if standard error should be suppressed.
198   * @param  args  The command-line arguments provided to this tool.
199   *
200   * @return  A result code with information about the processing performed.
201   *          Any result code other than {@link ResultCode#SUCCESS} indicates
202   *          that an error occurred.
203   */
204  public static ResultCode main(final OutputStream out, final OutputStream err,
205                                final String... args)
206  {
207    final SplitLDIF tool = new SplitLDIF(out, err);
208    return tool.runTool(args);
209  }
210
211
212
213  /**
214   * Creates a new instance of this tool with the provided information.
215   *
216   * @param  out  The output stream used for standard output.  It may be
217   *              {@code null} if standard output should be suppressed.
218   * @param  err  The output stream used for standard error.  It may be
219   *              {@code null} if standard error should be suppressed.
220   */
221  public SplitLDIF(final OutputStream out, final OutputStream err)
222  {
223    super(out, err);
224  }
225
226
227
228  /**
229   * {@inheritDoc}
230   */
231  @Override()
232  public String getToolName()
233  {
234    return "split-ldif";
235  }
236
237
238
239  /**
240   * {@inheritDoc}
241   */
242  @Override()
243  public String getToolDescription()
244  {
245    return INFO_SPLIT_LDIF_TOOL_DESCRIPTION.get();
246  }
247
248
249
250  /**
251   * {@inheritDoc}
252   */
253  @Override()
254  public String getToolVersion()
255  {
256    return Version.NUMERIC_VERSION_STRING;
257  }
258
259
260
261  /**
262   * {@inheritDoc}
263   */
264  @Override()
265  public boolean supportsInteractiveMode()
266  {
267    return true;
268  }
269
270
271
272  /**
273   * {@inheritDoc}
274   */
275  @Override()
276  public boolean defaultsToInteractiveMode()
277  {
278    return true;
279  }
280
281
282
283  /**
284   * {@inheritDoc}
285   */
286  @Override()
287  public boolean supportsPropertiesFile()
288  {
289    return true;
290  }
291
292
293
294  /**
295   * {@inheritDoc}
296   */
297  @Override()
298  public void addToolArguments(final ArgumentParser parser)
299         throws ArgumentException
300  {
301    // Add the global arguments.
302    sourceLDIF = new FileArgument('l', "sourceLDIF", true, 0, null,
303         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_LDIF.get(), true, false, true,
304         false);
305    sourceLDIF.addLongIdentifier("inputLDIF", true);
306    sourceLDIF.addLongIdentifier("source-ldif", true);
307    sourceLDIF.addLongIdentifier("input-ldif", true);
308    parser.addArgument(sourceLDIF);
309
310    sourceCompressed = new BooleanArgument('C', "sourceCompressed",
311         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_COMPRESSED.get());
312    sourceCompressed.addLongIdentifier("inputCompressed", true);
313    sourceCompressed.addLongIdentifier("source-compressed", true);
314    sourceCompressed.addLongIdentifier("input-compressed", true);
315    parser.addArgument(sourceCompressed);
316
317    targetLDIFBasePath = new FileArgument('o', "targetLDIFBasePath", false, 1,
318         null, INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_TARGET_LDIF_BASE.get(), false,
319         true, true, false);
320    targetLDIFBasePath.addLongIdentifier("outputLDIFBasePath", true);
321    targetLDIFBasePath.addLongIdentifier("target-ldif-base-path", true);
322    targetLDIFBasePath.addLongIdentifier("output-ldif-base-path", true);
323    parser.addArgument(targetLDIFBasePath);
324
325    compressTarget = new BooleanArgument('c', "compressTarget",
326         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_COMPRESS_TARGET.get());
327    compressTarget.addLongIdentifier("compressOutput", true);
328    compressTarget.addLongIdentifier("compress", true);
329    compressTarget.addLongIdentifier("compress-target", true);
330    compressTarget.addLongIdentifier("compress-output", true);
331    parser.addArgument(compressTarget);
332
333    encryptTarget = new BooleanArgument(null, "encryptTarget",
334         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_ENCRYPT_TARGET.get());
335    encryptTarget.addLongIdentifier("encryptOutput", true);
336    encryptTarget.addLongIdentifier("encrypt", true);
337    encryptTarget.addLongIdentifier("encrypt-target", true);
338    encryptTarget.addLongIdentifier("encrypt-output", true);
339    parser.addArgument(encryptTarget);
340
341    encryptionPassphraseFile = new FileArgument(null,
342         "encryptionPassphraseFile", false, 1, null,
343         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_ENCRYPT_PW_FILE.get(), true, true,
344         true, false);
345    encryptionPassphraseFile.addLongIdentifier("encryptionPasswordFile", true);
346    encryptionPassphraseFile.addLongIdentifier("encryption-passphrase-file",
347         true);
348    encryptionPassphraseFile.addLongIdentifier("encryption-password-file",
349         true);
350    parser.addArgument(encryptionPassphraseFile);
351
352    splitBaseDN = new DNArgument('b', "splitBaseDN", true, 1, null,
353         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SPLIT_BASE_DN.get());
354    splitBaseDN.addLongIdentifier("baseDN", true);
355    splitBaseDN.addLongIdentifier("split-base-dn", true);
356    splitBaseDN.addLongIdentifier("base-dn", true);
357    parser.addArgument(splitBaseDN);
358
359    addEntriesOutsideSplitBaseDNToAllSets = new BooleanArgument(null,
360         "addEntriesOutsideSplitBaseDNToAllSets", 1,
361         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_ALL_SETS.get());
362    addEntriesOutsideSplitBaseDNToAllSets.addLongIdentifier(
363         "add-entries-outside-split-base-dn-to-all-sets", true);
364    parser.addArgument(addEntriesOutsideSplitBaseDNToAllSets);
365
366    addEntriesOutsideSplitBaseDNToDedicatedSet = new BooleanArgument(null,
367         "addEntriesOutsideSplitBaseDNToDedicatedSet", 1,
368         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_DEDICATED_SET.get());
369    addEntriesOutsideSplitBaseDNToDedicatedSet.addLongIdentifier(
370         "add-entries-outside-split-base-dn-to-dedicated-set", true);
371    parser.addArgument(addEntriesOutsideSplitBaseDNToDedicatedSet);
372
373    schemaPath = new FileArgument(null, "schemaPath", false, 0, null,
374         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SCHEMA_PATH.get(), true, false, false,
375         false);
376    schemaPath.addLongIdentifier("schemaFile", true);
377    schemaPath.addLongIdentifier("schemaDirectory", true);
378    schemaPath.addLongIdentifier("schema-path", true);
379    schemaPath.addLongIdentifier("schema-file", true);
380    schemaPath.addLongIdentifier("schema-directory", true);
381    parser.addArgument(schemaPath);
382
383    numThreads = new IntegerArgument('t', "numThreads", false, 1, null,
384         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_NUM_THREADS.get(), 1,
385         Integer.MAX_VALUE, 1);
386    numThreads.addLongIdentifier("num-threads", true);
387    parser.addArgument(numThreads);
388
389
390    // Add the subcommand used to split entries using a hash on the RDN.
391    final ArgumentParser splitUsingHashOnRDNParser = new ArgumentParser(
392         "split-using-hash-on-rdn", INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get());
393
394    splitUsingHashOnRDNNumSets = new IntegerArgument(null, "numSets", true, 1,
395         null, INFO_SPLIT_LDIF_SC_HASH_ON_RDN_ARG_DESC_NUM_SETS.get(), 2,
396         Integer.MAX_VALUE);
397    splitUsingHashOnRDNNumSets.addLongIdentifier("num-sets", true);
398    splitUsingHashOnRDNParser.addArgument(splitUsingHashOnRDNNumSets);
399
400    final LinkedHashMap<String[],String> splitUsingHashOnRDNExamples =
401         new LinkedHashMap<>(1);
402    splitUsingHashOnRDNExamples.put(
403         new String[]
404         {
405           "split-using-hash-on-rdn",
406           "--sourceLDIF", "whole.ldif",
407           "--targetLDIFBasePath", "split.ldif",
408           "--splitBaseDN", "ou=People,dc=example,dc=com",
409           "--numSets", "4",
410           "--schemaPath", "config/schema",
411           "--addEntriesOutsideSplitBaseDNToAllSets"
412         },
413         INFO_SPLIT_LDIF_SC_HASH_ON_RDN_EXAMPLE.get());
414
415    splitUsingHashOnRDN = new SubCommand("split-using-hash-on-rdn",
416         INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get(), splitUsingHashOnRDNParser,
417         splitUsingHashOnRDNExamples);
418    splitUsingHashOnRDN.addName("hash-on-rdn", true);
419
420    parser.addSubCommand(splitUsingHashOnRDN);
421
422
423    // Add the subcommand used to split entries using a hash on a specified
424    // attribute.
425    final ArgumentParser splitUsingHashOnAttributeParser = new ArgumentParser(
426         "split-using-hash-on-attribute",
427         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get());
428
429    splitUsingHashOnAttributeAttributeName = new StringArgument(null,
430         "attributeName", true, 1, "{attr}",
431         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ATTR_NAME.get());
432    splitUsingHashOnAttributeAttributeName.addLongIdentifier("attribute-name",
433         true);
434    splitUsingHashOnAttributeParser.addArgument(
435         splitUsingHashOnAttributeAttributeName);
436
437    splitUsingHashOnAttributeNumSets = new IntegerArgument(null, "numSets",
438         true, 1, null, INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_NUM_SETS.get(),
439         2, Integer.MAX_VALUE);
440    splitUsingHashOnAttributeNumSets.addLongIdentifier("num-sets", true);
441    splitUsingHashOnAttributeParser.addArgument(
442         splitUsingHashOnAttributeNumSets);
443
444    splitUsingHashOnAttributeUseAllValues = new BooleanArgument(null,
445         "useAllValues", 1,
446         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ALL_VALUES.get());
447    splitUsingHashOnAttributeUseAllValues.addLongIdentifier("use-all-values",
448         true);
449    splitUsingHashOnAttributeParser.addArgument(
450         splitUsingHashOnAttributeUseAllValues);
451
452    splitUsingHashOnAttributeAssumeFlatDIT = new BooleanArgument(null,
453         "assumeFlatDIT", 1,
454         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ASSUME_FLAT_DIT.get());
455    splitUsingHashOnAttributeAssumeFlatDIT.addLongIdentifier("assume-flat-dit",
456         true);
457    splitUsingHashOnAttributeParser.addArgument(
458         splitUsingHashOnAttributeAssumeFlatDIT);
459
460    final LinkedHashMap<String[],String> splitUsingHashOnAttributeExamples =
461         new LinkedHashMap<>(1);
462    splitUsingHashOnAttributeExamples.put(
463         new String[]
464         {
465           "split-using-hash-on-attribute",
466           "--sourceLDIF", "whole.ldif",
467           "--targetLDIFBasePath", "split.ldif",
468           "--splitBaseDN", "ou=People,dc=example,dc=com",
469           "--attributeName", "uid",
470           "--numSets", "4",
471           "--schemaPath", "config/schema",
472           "--addEntriesOutsideSplitBaseDNToAllSets"
473         },
474         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_EXAMPLE.get());
475
476    splitUsingHashOnAttribute = new SubCommand("split-using-hash-on-attribute",
477         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get(),
478         splitUsingHashOnAttributeParser, splitUsingHashOnAttributeExamples);
479    splitUsingHashOnAttribute.addName("hash-on-attribute", true);
480
481    parser.addSubCommand(splitUsingHashOnAttribute);
482
483
484    // Add the subcommand used to split entries by selecting the set with the
485    // fewest entries.
486    final ArgumentParser splitUsingFewestEntriesParser = new ArgumentParser(
487         "split-using-fewest-entries",
488         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get());
489
490    splitUsingFewestEntriesNumSets = new IntegerArgument(null, "numSets",
491         true, 1, null,
492         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_NUM_SETS.get(),
493         2, Integer.MAX_VALUE);
494    splitUsingFewestEntriesNumSets.addLongIdentifier("num-sets", true);
495    splitUsingFewestEntriesParser.addArgument(splitUsingFewestEntriesNumSets);
496
497    splitUsingFewestEntriesAssumeFlatDIT = new BooleanArgument(null,
498         "assumeFlatDIT", 1,
499         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_ASSUME_FLAT_DIT.get());
500    splitUsingFewestEntriesAssumeFlatDIT.addLongIdentifier("assume-flat-dit",
501         true);
502    splitUsingFewestEntriesParser.addArgument(
503         splitUsingFewestEntriesAssumeFlatDIT);
504
505    final LinkedHashMap<String[],String> splitUsingFewestEntriesExamples =
506         new LinkedHashMap<>(1);
507    splitUsingFewestEntriesExamples.put(
508         new String[]
509         {
510           "split-using-fewest-entries",
511           "--sourceLDIF", "whole.ldif",
512           "--targetLDIFBasePath", "split.ldif",
513           "--splitBaseDN", "ou=People,dc=example,dc=com",
514           "--numSets", "4",
515           "--schemaPath", "config/schema",
516           "--addEntriesOutsideSplitBaseDNToAllSets"
517         },
518         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_EXAMPLE.get());
519
520    splitUsingFewestEntries = new SubCommand("split-using-fewest-entries",
521         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get(),
522         splitUsingFewestEntriesParser, splitUsingFewestEntriesExamples);
523    splitUsingFewestEntries.addName("fewest-entries", true);
524
525    parser.addSubCommand(splitUsingFewestEntries);
526
527
528    // Add the subcommand used to split entries by selecting the set based on a
529    // filter.
530    final ArgumentParser splitUsingFilterParser = new ArgumentParser(
531         "split-using-filter", INFO_SPLIT_LDIF_SC_FILTER_DESC.get());
532
533    splitUsingFilterFilter = new FilterArgument(null, "filter", true, 0, null,
534         INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_FILTER.get());
535    splitUsingFilterParser.addArgument(splitUsingFilterFilter);
536
537    splitUsingFilterAssumeFlatDIT = new BooleanArgument(null, "assumeFlatDIT",
538         1, INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_ASSUME_FLAT_DIT.get());
539    splitUsingFilterAssumeFlatDIT.addLongIdentifier("assume-flat-dit", true);
540    splitUsingFilterParser.addArgument(splitUsingFilterAssumeFlatDIT);
541
542    final LinkedHashMap<String[],String> splitUsingFilterExamples =
543         new LinkedHashMap<>(1);
544    splitUsingFilterExamples.put(
545         new String[]
546         {
547           "split-using-filter",
548           "--sourceLDIF", "whole.ldif",
549           "--targetLDIFBasePath", "split.ldif",
550           "--splitBaseDN", "ou=People,dc=example,dc=com",
551           "--filter", "(timeZone=Eastern)",
552           "--filter", "(timeZone=Central)",
553           "--filter", "(timeZone=Mountain)",
554           "--filter", "(timeZone=Pacific)",
555           "--schemaPath", "config/schema",
556           "--addEntriesOutsideSplitBaseDNToAllSets"
557         },
558         INFO_SPLIT_LDIF_SC_FILTER_EXAMPLE.get());
559
560    splitUsingFilter = new SubCommand("split-using-filter",
561         INFO_SPLIT_LDIF_SC_FILTER_DESC.get(),
562         splitUsingFilterParser, splitUsingFilterExamples);
563    splitUsingFilter.addName("filter", true);
564
565    parser.addSubCommand(splitUsingFilter);
566  }
567
568
569
570  /**
571   * {@inheritDoc}
572   */
573  @Override()
574  public void doExtendedArgumentValidation()
575         throws ArgumentException
576  {
577    // If multiple sourceLDIF values were provided, then a target LDIF base path
578    // must have been given.
579    final List<File> sourceLDIFValues = sourceLDIF.getValues();
580    if (sourceLDIFValues.size() > 1)
581    {
582      if (! targetLDIFBasePath.isPresent())
583      {
584        throw new ArgumentException(ERR_SPLIT_LDIF_NO_TARGET_BASE_PATH.get(
585             sourceLDIF.getIdentifierString(),
586             targetLDIFBasePath.getIdentifierString()));
587      }
588    }
589
590
591    // If the split-using-filter subcommand was provided, then at least two
592    // filters must have been provided, and none of the filters can be logically
593    // equivalent to any of the others.
594    if (splitUsingFilter.isPresent())
595    {
596      final List<Filter> filterList = splitUsingFilterFilter.getValues();
597      final Set<Filter> filterSet =
598           new LinkedHashSet<>(filterList.size());
599      for (final Filter f : filterList)
600      {
601        if (filterSet.contains(f))
602        {
603          throw new ArgumentException(ERR_SPLIT_LDIF_NON_UNIQUE_FILTER.get(
604               splitUsingFilterFilter.getIdentifierString(), f.toString()));
605        }
606        else
607        {
608          filterSet.add(f);
609        }
610      }
611
612      if (filterSet.size() < 2)
613      {
614        throw new ArgumentException(ERR_SPLIT_LDIF_NOT_ENOUGH_FILTERS.get(
615             splitUsingFilter.getPrimaryName(),
616             splitUsingFilterFilter.getIdentifierString()));
617      }
618    }
619  }
620
621
622
623  /**
624   * {@inheritDoc}
625   */
626  @Override()
627  public ResultCode doToolProcessing()
628  {
629    // Get the schema to use during processing.
630    final Schema schema;
631    try
632    {
633      schema = getSchema();
634    }
635    catch (final LDAPException le)
636    {
637      wrapErr(0, MAX_OUTPUT_LINE_LENGTH, le.getMessage());
638      return le.getResultCode();
639    }
640
641
642    // If an encryption passphrase file is provided, then get the passphrase
643    // from it.
644    String encryptionPassphrase = null;
645    if (encryptionPassphraseFile.isPresent())
646    {
647      try
648      {
649        encryptionPassphrase = ToolUtils.readEncryptionPassphraseFromFile(
650             encryptionPassphraseFile.getValue());
651      }
652      catch (final LDAPException e)
653      {
654        Debug.debugException(e);
655        wrapErr(0, MAX_OUTPUT_LINE_LENGTH, e.getMessage());
656        return e.getResultCode();
657      }
658    }
659
660
661    // Figure out which subcommand was selected, and create the appropriate
662    // translator to use to perform the processing.
663    final SplitLDIFTranslator translator;
664    if (splitUsingHashOnRDN.isPresent())
665    {
666      translator = new SplitLDIFRDNHashTranslator(splitBaseDN.getValue(),
667           splitUsingHashOnRDNNumSets.getValue(),
668           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
669           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
670    }
671    else if (splitUsingHashOnAttribute.isPresent())
672    {
673      translator = new SplitLDIFAttributeHashTranslator(splitBaseDN.getValue(),
674           splitUsingHashOnAttributeNumSets.getValue(),
675           splitUsingHashOnAttributeAttributeName.getValue(),
676           splitUsingHashOnAttributeUseAllValues.isPresent(),
677           splitUsingHashOnAttributeAssumeFlatDIT.isPresent(),
678           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
679           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
680    }
681    else if (splitUsingFewestEntries.isPresent())
682    {
683      translator = new SplitLDIFFewestEntriesTranslator(splitBaseDN.getValue(),
684           splitUsingFewestEntriesNumSets.getValue(),
685           splitUsingFewestEntriesAssumeFlatDIT.isPresent(),
686           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
687           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
688    }
689    else if (splitUsingFilter.isPresent())
690    {
691      final List<Filter> filterList = splitUsingFilterFilter.getValues();
692      final LinkedHashSet<Filter> filterSet =
693           new LinkedHashSet<>(filterList.size());
694      for (final Filter f : filterList)
695      {
696        filterSet.add(f);
697      }
698
699      translator = new SplitLDIFFilterTranslator(splitBaseDN.getValue(),
700           schema, filterSet, splitUsingFilterAssumeFlatDIT.isPresent(),
701           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
702           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
703    }
704    else
705    {
706      // This should never happen.
707      wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
708           ERR_SPLIT_LDIF_CANNOT_DETERMINE_SPLIT_ALGORITHM.get(
709                splitUsingHashOnRDN.getPrimaryName() + ", " +
710                splitUsingHashOnAttribute.getPrimaryName() + ", " +
711                splitUsingFewestEntries.getPrimaryName() + ", " +
712                splitUsingFilter.getPrimaryName()));
713      return ResultCode.PARAM_ERROR;
714    }
715
716
717    // Create the LDIF reader.
718    final LDIFReader ldifReader;
719    try
720    {
721      final InputStream inputStream;
722      if (sourceLDIF.isPresent())
723      {
724        final ObjectPair<InputStream,String> p =
725             ToolUtils.getInputStreamForLDIFFiles(sourceLDIF.getValues(),
726                  encryptionPassphrase, getOut(), getErr());
727        inputStream = p.getFirst();
728        if ((encryptionPassphrase == null) && (p.getSecond() != null))
729        {
730          encryptionPassphrase = p.getSecond();
731        }
732      }
733      else
734      {
735        inputStream = System.in;
736      }
737
738      ldifReader = new LDIFReader(inputStream, numThreads.getValue(),
739           translator);
740      if (schema != null)
741      {
742        ldifReader.setSchema(schema);
743      }
744    }
745    catch (final Exception e)
746    {
747      Debug.debugException(e);
748      wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
749           ERR_SPLIT_LDIF_ERROR_CREATING_LDIF_READER.get(
750                StaticUtils.getExceptionMessage(e)));
751      return ResultCode.LOCAL_ERROR;
752    }
753
754
755    // Iterate through and process all of the entries.
756    ResultCode resultCode = ResultCode.SUCCESS;
757    final LinkedHashMap<String,OutputStream> outputStreams =
758         new LinkedHashMap<>(10);
759    try
760    {
761      final AtomicLong entriesRead = new AtomicLong(0L);
762      final AtomicLong entriesExcluded = new AtomicLong(0L);
763      final TreeMap<String,AtomicLong> fileCounts = new TreeMap<>();
764
765readLoop:
766      while (true)
767      {
768        final SplitLDIFEntry entry;
769        try
770        {
771          entry = (SplitLDIFEntry) ldifReader.readEntry();
772        }
773        catch (final LDIFException le)
774        {
775          Debug.debugException(le);
776          resultCode = ResultCode.LOCAL_ERROR;
777
778          final File f = getOutputFile(SplitLDIFEntry.SET_NAME_ERRORS);
779          OutputStream s = outputStreams.get(SplitLDIFEntry.SET_NAME_ERRORS);
780          if (s == null)
781          {
782            try
783            {
784              s = new FileOutputStream(f);
785
786              if (encryptTarget.isPresent())
787              {
788                if (encryptionPassphrase == null)
789                {
790                  try
791                  {
792                    encryptionPassphrase =
793                         ToolUtils.promptForEncryptionPassphrase(false, true,
794                              getOut(), getErr());
795                  }
796                  catch (final LDAPException ex)
797                  {
798                    Debug.debugException(ex);
799                    wrapErr(0, MAX_OUTPUT_LINE_LENGTH, ex.getMessage());
800                    return ex.getResultCode();
801                  }
802                }
803
804                s = new PassphraseEncryptedOutputStream(encryptionPassphrase,
805                     s);
806              }
807
808              if (compressTarget.isPresent())
809              {
810                s = new GZIPOutputStream(s);
811              }
812
813              outputStreams.put(SplitLDIFEntry.SET_NAME_ERRORS, s);
814              fileCounts.put(SplitLDIFEntry.SET_NAME_ERRORS,
815                   new AtomicLong(0L));
816            }
817            catch (final Exception e)
818            {
819              Debug.debugException(e);
820              resultCode = ResultCode.LOCAL_ERROR;
821              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
822                   ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get(
823                        f.getAbsolutePath(),
824                        StaticUtils.getExceptionMessage(e)));
825              break readLoop;
826            }
827          }
828
829          final ByteStringBuffer buffer = new ByteStringBuffer();
830          buffer.append("# ");
831          buffer.append(le.getMessage());
832          buffer.append(StaticUtils.EOL_BYTES);
833
834          final List<String> dataLines = le.getDataLines();
835          if (dataLines != null)
836          {
837            for (final String dataLine : dataLines)
838            {
839              buffer.append(dataLine);
840              buffer.append(StaticUtils.EOL_BYTES);
841            }
842          }
843
844          buffer.append(StaticUtils.EOL_BYTES);
845
846          try
847          {
848            s.write(buffer.toByteArray());
849          }
850          catch (final Exception e)
851          {
852              Debug.debugException(e);
853              resultCode = ResultCode.LOCAL_ERROR;
854              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
855                   ERR_SPLIT_LDIF_ERROR_WRITING_ERROR_TO_FILE.get(
856                        le.getMessage(), f.getAbsolutePath(),
857                        StaticUtils.getExceptionMessage(e)));
858              break readLoop;
859          }
860
861          if (le.mayContinueReading())
862          {
863            wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
864                 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_RECOVERABLE.get(
865                      StaticUtils.getExceptionMessage(le)));
866            continue;
867          }
868          else
869          {
870            wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
871                 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_UNRECOVERABLE.get(
872                      StaticUtils.getExceptionMessage(le)));
873            break;
874          }
875        }
876        catch (final IOException ioe)
877        {
878          Debug.debugException(ioe);
879          resultCode = ResultCode.LOCAL_ERROR;
880          wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
881               ERR_SPLIT_LDIF_IO_READ_ERROR.get(
882                    StaticUtils.getExceptionMessage(ioe)));
883          break;
884        }
885        catch (final Exception e)
886        {
887          Debug.debugException(e);
888          resultCode = ResultCode.LOCAL_ERROR;
889          wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
890               ERR_SPLIT_LDIF_UNEXPECTED_READ_ERROR.get(
891                    StaticUtils.getExceptionMessage(e)));
892          break;
893        }
894
895        if (entry == null)
896        {
897          break;
898        }
899
900        final long readCount = entriesRead.incrementAndGet();
901        if ((readCount % 1000L) == 0)
902        {
903          // Even though we aren't done with this entry yet, we'll go ahead and
904          // log a progress message now because it's easier to do that now than
905          // to ensure that it's handled properly through all possible error
906          // conditions that need to be handled below.
907          wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
908               INFO_SPLIT_LDIF_PROGRESS.get(readCount));
909        }
910
911
912        // Get the set(s) to which the entry should be written.  If this is
913        // null (which could be the case as a result of a race condition when
914        // using multiple threads where processing for a child completes before
915        // processing for its parent, or as a result of a case in which a
916        // child is included without or before its parent), then try to see if
917        // we can get the sets by passing the entry through the translator.
918        Set<String> sets = entry.getSets();
919        byte[] ldifBytes = entry.getLDIFBytes();
920        if (sets == null)
921        {
922          try
923          {
924            sets = translator.translate(entry, 0L).getSets();
925          }
926          catch (final Exception e)
927          {
928            Debug.debugException(e);
929          }
930
931          if (sets == null)
932          {
933            final SplitLDIFEntry errorEntry =  translator.createEntry(entry,
934                 ERR_SPLIT_LDIF_ENTRY_WITHOUT_PARENT.get(
935                      entry.getDN(), splitBaseDN.getStringValue()),
936                 Collections.singleton(SplitLDIFEntry.SET_NAME_ERRORS));
937            ldifBytes = errorEntry.getLDIFBytes();
938            sets = errorEntry.getSets();
939          }
940        }
941
942
943        // If the entry shouldn't be written into any sets, then we don't need
944        // to do anything else.
945        if (sets.isEmpty())
946        {
947          entriesExcluded.incrementAndGet();
948          continue;
949        }
950
951
952        // Write the entry into each of the target sets, creating the output
953        // files if necessary.
954        for (final String set : sets)
955        {
956          if (set.equals(SplitLDIFEntry.SET_NAME_ERRORS))
957          {
958            // This indicates that an error was encountered during processing,
959            // so we'll update the result code to reflect that.
960            resultCode = ResultCode.LOCAL_ERROR;
961          }
962
963          final File f = getOutputFile(set);
964          OutputStream s = outputStreams.get(set);
965          if (s == null)
966          {
967            try
968            {
969              s = new FileOutputStream(f);
970
971              if (encryptTarget.isPresent())
972              {
973                if (encryptionPassphrase == null)
974                {
975                  try
976                  {
977                    encryptionPassphrase =
978                         ToolUtils.promptForEncryptionPassphrase(false, true,
979                              getOut(), getErr());
980                  }
981                  catch (final LDAPException ex)
982                  {
983                    Debug.debugException(ex);
984                    wrapErr(0, MAX_OUTPUT_LINE_LENGTH, ex.getMessage());
985                    return ex.getResultCode();
986                  }
987                }
988
989                s = new PassphraseEncryptedOutputStream(encryptionPassphrase,
990                     s);
991              }
992
993              if (compressTarget.isPresent())
994              {
995                s = new GZIPOutputStream(s);
996              }
997
998              outputStreams.put(set, s);
999              fileCounts.put(set, new AtomicLong(0L));
1000            }
1001            catch (final Exception e)
1002            {
1003              Debug.debugException(e);
1004              resultCode = ResultCode.LOCAL_ERROR;
1005              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
1006                   ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get(
1007                        f.getAbsolutePath(),
1008                        StaticUtils.getExceptionMessage(e)));
1009              break readLoop;
1010            }
1011          }
1012
1013          try
1014          {
1015            s.write(ldifBytes);
1016          }
1017          catch (final Exception e)
1018          {
1019              Debug.debugException(e);
1020              resultCode = ResultCode.LOCAL_ERROR;
1021              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
1022                   ERR_SPLIT_LDIF_ERROR_WRITING_TO_FILE.get(
1023                        entry.getDN(), f.getAbsolutePath(),
1024                        StaticUtils.getExceptionMessage(e)));
1025              break readLoop;
1026          }
1027
1028          fileCounts.get(set).incrementAndGet();
1029        }
1030      }
1031
1032
1033      // Processing is complete.  Summarize the processing that was performed.
1034      final long finalReadCount = entriesRead.get();
1035      if (finalReadCount > 1000L)
1036      {
1037        out();
1038      }
1039
1040      wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
1041           INFO_SPLIT_LDIF_PROCESSING_COMPLETE.get(finalReadCount));
1042
1043      final long excludedCount = entriesExcluded.get();
1044      if (excludedCount > 0L)
1045      {
1046        wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
1047             INFO_SPLIT_LDIF_EXCLUDED_COUNT.get(excludedCount));
1048      }
1049
1050      for (final Map.Entry<String,AtomicLong> e : fileCounts.entrySet())
1051      {
1052        final File f = getOutputFile(e.getKey());
1053        wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
1054             INFO_SPLIT_LDIF_COUNT_TO_FILE.get(e.getValue().get(),
1055                  f.getName()));
1056      }
1057    }
1058    finally
1059    {
1060      try
1061      {
1062        ldifReader.close();
1063      }
1064      catch (final Exception e)
1065      {
1066        Debug.debugException(e);
1067      }
1068
1069      for (final Map.Entry<String,OutputStream> e : outputStreams.entrySet())
1070      {
1071        try
1072        {
1073          e.getValue().close();
1074        }
1075        catch (final Exception ex)
1076        {
1077          Debug.debugException(ex);
1078          resultCode = ResultCode.LOCAL_ERROR;
1079          wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
1080               ERR_SPLIT_LDIF_ERROR_CLOSING_FILE.get(
1081                    getOutputFile(e.getKey()),
1082                    StaticUtils.getExceptionMessage(ex)));
1083        }
1084      }
1085    }
1086
1087    return resultCode;
1088  }
1089
1090
1091
1092  /**
1093   * Retrieves the schema that should be used for processing.
1094   *
1095   * @return  The schema that was created.
1096   *
1097   * @throws  LDAPException  If a problem is encountered while retrieving the
1098   *                         schema.
1099   */
1100  private Schema getSchema()
1101          throws LDAPException
1102  {
1103    // If any schema paths were specified, then load the schema only from those
1104    // paths.
1105    if (schemaPath.isPresent())
1106    {
1107      final ArrayList<File> schemaFiles = new ArrayList<>(10);
1108      for (final File path : schemaPath.getValues())
1109      {
1110        if (path.isFile())
1111        {
1112          schemaFiles.add(path);
1113        }
1114        else
1115        {
1116          final TreeMap<String,File> fileMap = new TreeMap<>();
1117          for (final File schemaDirFile : path.listFiles())
1118          {
1119            final String name = schemaDirFile.getName();
1120            if (schemaDirFile.isFile() && name.toLowerCase().endsWith(".ldif"))
1121            {
1122              fileMap.put(name, schemaDirFile);
1123            }
1124          }
1125          schemaFiles.addAll(fileMap.values());
1126        }
1127      }
1128
1129      if (schemaFiles.isEmpty())
1130      {
1131        throw new LDAPException(ResultCode.PARAM_ERROR,
1132             ERR_SPLIT_LDIF_NO_SCHEMA_FILES.get(
1133                  schemaPath.getIdentifierString()));
1134      }
1135      else
1136      {
1137        try
1138        {
1139          return Schema.getSchema(schemaFiles);
1140        }
1141        catch (final Exception e)
1142        {
1143          Debug.debugException(e);
1144          throw new LDAPException(ResultCode.LOCAL_ERROR,
1145               ERR_SPLIT_LDIF_ERROR_LOADING_SCHEMA.get(
1146                    StaticUtils.getExceptionMessage(e)));
1147        }
1148      }
1149    }
1150    else
1151    {
1152      // If the INSTANCE_ROOT environment variable is set and it refers to a
1153      // directory that has a config/schema subdirectory that has one or more
1154      // schema files in it, then read the schema from that directory.
1155      try
1156      {
1157        final String instanceRootStr = System.getenv("INSTANCE_ROOT");
1158        if (instanceRootStr != null)
1159        {
1160          final File instanceRoot = new File(instanceRootStr);
1161          final File configDir = new File(instanceRoot, "config");
1162          final File schemaDir = new File(configDir, "schema");
1163          if (schemaDir.exists())
1164          {
1165            final TreeMap<String,File> fileMap = new TreeMap<>();
1166            for (final File schemaDirFile : schemaDir.listFiles())
1167            {
1168              final String name = schemaDirFile.getName();
1169              if (schemaDirFile.isFile() &&
1170                  name.toLowerCase().endsWith(".ldif"))
1171              {
1172                fileMap.put(name, schemaDirFile);
1173              }
1174            }
1175
1176            if (! fileMap.isEmpty())
1177            {
1178              return Schema.getSchema(new ArrayList<>(fileMap.values()));
1179            }
1180          }
1181        }
1182      }
1183      catch (final Exception e)
1184      {
1185        Debug.debugException(e);
1186      }
1187    }
1188
1189
1190    // If we've gotten here, then just return null and the tool will try to use
1191    // the default standard schema.
1192    return null;
1193  }
1194
1195
1196
1197  /**
1198   * Retrieves a file object that refers to an output file with the provided
1199   * extension.
1200   *
1201   * @param  extension  The extension to use for the file.
1202   *
1203   * @return  A file object that refers to an output file with the provided
1204   *          extension.
1205   */
1206  private File getOutputFile(final String extension)
1207  {
1208    final File baseFile;
1209    if (targetLDIFBasePath.isPresent())
1210    {
1211      baseFile = targetLDIFBasePath.getValue();
1212    }
1213    else
1214    {
1215      baseFile = sourceLDIF.getValue();
1216    }
1217
1218    return new File(baseFile.getAbsolutePath() + extension);
1219  }
1220
1221
1222
1223  /**
1224   * {@inheritDoc}
1225   */
1226  @Override()
1227  public LinkedHashMap<String[],String> getExampleUsages()
1228  {
1229    final LinkedHashMap<String[],String> exampleMap =
1230         new LinkedHashMap<>(4);
1231
1232    for (final Map.Entry<String[],String> e :
1233         splitUsingHashOnRDN.getExampleUsages().entrySet())
1234    {
1235      exampleMap.put(e.getKey(), e.getValue());
1236    }
1237
1238    for (final Map.Entry<String[],String> e :
1239         splitUsingHashOnAttribute.getExampleUsages().entrySet())
1240    {
1241      exampleMap.put(e.getKey(), e.getValue());
1242    }
1243
1244    for (final Map.Entry<String[],String> e :
1245         splitUsingFewestEntries.getExampleUsages().entrySet())
1246    {
1247      exampleMap.put(e.getKey(), e.getValue());
1248    }
1249
1250    for (final Map.Entry<String[],String> e :
1251         splitUsingFilter.getExampleUsages().entrySet())
1252    {
1253      exampleMap.put(e.getKey(), e.getValue());
1254    }
1255
1256    return exampleMap;
1257  }
1258}