001/*
002 * Copyright 2016-2017 Ping Identity Corporation
003 * All Rights Reserved.
004 */
005/*
006 * Copyright (C) 2016-2017 Ping Identity Corporation
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021package com.unboundid.ldap.sdk.unboundidds.tools;
022
023
024
025import java.io.ByteArrayInputStream;
026import java.io.File;
027import java.io.FileInputStream;
028import java.io.FileOutputStream;
029import java.io.InputStream;
030import java.io.IOException;
031import java.io.OutputStream;
032import java.util.ArrayList;
033import java.util.Collections;
034import java.util.LinkedHashMap;
035import java.util.LinkedHashSet;
036import java.util.List;
037import java.util.Map;
038import java.util.Set;
039import java.util.TreeMap;
040import java.util.concurrent.atomic.AtomicLong;
041import java.util.zip.GZIPInputStream;
042import java.util.zip.GZIPOutputStream;
043
044import com.unboundid.ldap.sdk.Filter;
045import com.unboundid.ldap.sdk.LDAPException;
046import com.unboundid.ldap.sdk.ResultCode;
047import com.unboundid.ldap.sdk.Version;
048import com.unboundid.ldap.sdk.schema.Schema;
049import com.unboundid.ldif.LDIFException;
050import com.unboundid.ldif.LDIFReader;
051import com.unboundid.util.ByteStringBuffer;
052import com.unboundid.util.CommandLineTool;
053import com.unboundid.util.AggregateInputStream;
054import com.unboundid.util.Debug;
055import com.unboundid.util.StaticUtils;
056import com.unboundid.util.ThreadSafety;
057import com.unboundid.util.ThreadSafetyLevel;
058import com.unboundid.util.args.ArgumentException;
059import com.unboundid.util.args.ArgumentParser;
060import com.unboundid.util.args.BooleanArgument;
061import com.unboundid.util.args.DNArgument;
062import com.unboundid.util.args.FileArgument;
063import com.unboundid.util.args.FilterArgument;
064import com.unboundid.util.args.IntegerArgument;
065import com.unboundid.util.args.SubCommand;
066import com.unboundid.util.args.StringArgument;
067
068import static com.unboundid.ldap.sdk.unboundidds.tools.ToolMessages.*;
069
070
071
072/**
073 * This class provides a command-line tool that can be used to split an LDIF
074 * file below a specified base DN.  This can be used to help initialize an
075 * entry-balancing deployment for use with the Directory Proxy Server.
076 * <BR>
077 * <BLOCKQUOTE>
078 *   <B>NOTE:</B>  This class, and other classes within the
079 *   {@code com.unboundid.ldap.sdk.unboundidds} package structure, are only
080 *   supported for use against Ping Identity, UnboundID, and Alcatel-Lucent 8661
081 *   server products.  These classes provide support for proprietary
082 *   functionality or for external specifications that are not considered stable
083 *   or mature enough to be guaranteed to work in an interoperable way with
084 *   other types of LDAP servers.
085 * </BLOCKQUOTE>
086 * <BR>
087 * It supports a number of algorithms for determining how to split the data,
088 * including:
089 * <UL>
090 *   <LI>
091 *     split-using-hash-on-rdn -- The tool will compute a digest of the DN
092 *     component that is immediately below the split base DN, and will use a
093 *     modulus to select a backend set for a given entry.  Since the split is
094 *     based purely on computation involving the DN, the there is no need for
095 *     caching to ensure that children are placed in the same sets as their
096 *     parent, which allows it to run effectively with a small memory footprint.
097 *   </LI>
098 *   <LI>
099 *     split-using-hash-on-attribute -- The tool will compute a digest of the
100 *     value(s) of a specified attribute, and will use a modulus to select a
101 *     backend set for a given entry.  This hash will only be computed for
102 *     entries immediately below the split base DN, and a cache will be used to
103 *     ensure that entries more than one level below the split base DN are
104 *     placed in the same backend set as their parent.
105 *   </LI>
106 *   <LI>
107 *     split-using-fewest-entries -- When examining an entry immediately below
108 *     the split base DN, the tool will place that entry in the set that has the
109 *     fewest entries.  For flat DITs in which entries only exist one level
110 *     below the split base DN, this will effectively ensure a round-robin
111 *     distribution.  But for cases in which there are branches of varying sizes
112 *     below the split base DN, this can help ensure that entries are more
113 *     evenly distributed across backend sets.  A cache will be used to ensure
114 *     that entries more than one level below the split base DN are placed in
115 *     the same backend set as their parent.
116 *   </LI>
117 *   <LI>
118 *     split-using-filter -- When examining an entry immediately below the split
119 *     base DN, a series of filters will be evaluated against that entry, which
120 *     each filter associated with a specific backend set.  If an entry doesn't
121 *     match any of the provided filters, an RDN hash can be used to select the
122 *     set.  A cache will be used to ensure that entries more than one level
123 *     below the split base DN are placed in the same backend set as their
124 *     parent.
125 *   </LI>
126 * </UL>
127 */
128@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE)
129public final class SplitLDIF
130     extends CommandLineTool
131{
132  /**
133   * The maximum length of any message to write to standard output or standard
134   * error.
135   */
136  private static final int MAX_OUTPUT_LINE_LENGTH =
137       StaticUtils.TERMINAL_WIDTH_COLUMNS - 1;
138
139
140
141  // The global arguments used by this tool.
142  private BooleanArgument addEntriesOutsideSplitBaseDNToAllSets = null;
143  private BooleanArgument addEntriesOutsideSplitBaseDNToDedicatedSet = null;
144  private BooleanArgument compressTarget = null;
145  private BooleanArgument sourceCompressed = null;
146  private DNArgument splitBaseDN = null;
147  private FileArgument schemaPath = null;
148  private FileArgument sourceLDIF = null;
149  private FileArgument targetLDIFBasePath = null;
150  private IntegerArgument numThreads = null;
151
152  // The arguments used to split using a hash of the RDN.
153  private IntegerArgument splitUsingHashOnRDNNumSets = null;
154  private SubCommand splitUsingHashOnRDN = null;
155
156  // The arguments used to split using a hash on a specified attribute.
157  private BooleanArgument splitUsingHashOnAttributeAssumeFlatDIT = null;
158  private BooleanArgument splitUsingHashOnAttributeUseAllValues = null;
159  private IntegerArgument splitUsingHashOnAttributeNumSets = null;
160  private StringArgument splitUsingHashOnAttributeAttributeName = null;
161  private SubCommand splitUsingHashOnAttribute = null;
162
163  // The arguments used to choose the set with the fewest entries.
164  private BooleanArgument splitUsingFewestEntriesAssumeFlatDIT = null;
165  private IntegerArgument splitUsingFewestEntriesNumSets = null;
166  private SubCommand splitUsingFewestEntries = null;
167
168  // The arguments used to choose the set using a provided set of filters.
169  private BooleanArgument splitUsingFilterAssumeFlatDIT = null;
170  private FilterArgument splitUsingFilterFilter = null;
171  private SubCommand splitUsingFilter = null;
172
173
174
175  /**
176   * Runs the tool with the provided set of command-line arguments.
177   *
178   * @param  args  The command-line arguments provided to this tool.
179   */
180  public static void main(final String... args)
181  {
182    final ResultCode resultCode = main(System.out, System.err, args);
183    if (resultCode != ResultCode.SUCCESS)
184    {
185      System.exit(resultCode.intValue());
186    }
187  }
188
189
190
191  /**
192   * Runs the tool with the provided set of command-line arguments.
193   *
194   * @param  out   The output stream used for standard output.  It may be
195   *               {@code null} if standard output should be suppressed.
196   * @param  err   The output stream used for standard error.  It may be
197   *               {@code null} if standard error should be suppressed.
198   * @param  args  The command-line arguments provided to this tool.
199   *
200   * @return  A result code with information about the processing performed.
201   *          Any result code other than {@link ResultCode#SUCCESS} indicates
202   *          that an error occurred.
203   */
204  public static ResultCode main(final OutputStream out, final OutputStream err,
205                                final String... args)
206  {
207    final SplitLDIF tool = new SplitLDIF(out, err);
208    return tool.runTool(args);
209  }
210
211
212
213  /**
214   * Creates a new instance of this tool with the provided information.
215   *
216   * @param  out  The output stream used for standard output.  It may be
217   *              {@code null} if standard output should be suppressed.
218   * @param  err  The output stream used for standard error.  It may be
219   *              {@code null} if standard error should be suppressed.
220   */
221  public SplitLDIF(final OutputStream out, final OutputStream err)
222  {
223    super(out, err);
224  }
225
226
227
228  /**
229   * {@inheritDoc}
230   */
231  @Override()
232  public String getToolName()
233  {
234    return "split-ldif";
235  }
236
237
238
239  /**
240   * {@inheritDoc}
241   */
242  @Override()
243  public String getToolDescription()
244  {
245    return INFO_SPLIT_LDIF_TOOL_DESCRIPTION.get();
246  }
247
248
249
250  /**
251   * {@inheritDoc}
252   */
253  @Override()
254  public String getToolVersion()
255  {
256    return Version.NUMERIC_VERSION_STRING;
257  }
258
259
260
261  /**
262   * {@inheritDoc}
263   */
264  @Override()
265  public boolean supportsInteractiveMode()
266  {
267    return true;
268  }
269
270
271
272  /**
273   * {@inheritDoc}
274   */
275  @Override()
276  public boolean defaultsToInteractiveMode()
277  {
278    return true;
279  }
280
281
282
283  /**
284   * {@inheritDoc}
285   */
286  @Override()
287  public boolean supportsPropertiesFile()
288  {
289    return true;
290  }
291
292
293
294  /**
295   * {@inheritDoc}
296   */
297  @Override()
298  public void addToolArguments(final ArgumentParser parser)
299         throws ArgumentException
300  {
301    // Add the global arguments.
302    sourceLDIF = new FileArgument('l', "sourceLDIF", true, 0, null,
303         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_LDIF.get(), true, false, true,
304         false);
305    sourceLDIF.addLongIdentifier("inputLDIF");
306    sourceLDIF.addLongIdentifier("source-ldif");
307    sourceLDIF.addLongIdentifier("input-ldif");
308    parser.addArgument(sourceLDIF);
309
310    sourceCompressed = new BooleanArgument('C', "sourceCompressed",
311         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_COMPRESSED.get());
312    sourceCompressed.addLongIdentifier("inputCompressed");
313    sourceCompressed.addLongIdentifier("source-compressed");
314    sourceCompressed.addLongIdentifier("input-compressed");
315    parser.addArgument(sourceCompressed);
316
317    targetLDIFBasePath = new FileArgument('o', "targetLDIFBasePath", false, 1,
318         null, INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_TARGET_LDIF_BASE.get(), false,
319         true, true, false);
320    targetLDIFBasePath.addLongIdentifier("outputLDIFBasePath");
321    targetLDIFBasePath.addLongIdentifier("target-ldif-base-path");
322    targetLDIFBasePath.addLongIdentifier("output-ldif-base-path");
323    parser.addArgument(targetLDIFBasePath);
324
325    compressTarget = new BooleanArgument('c', "compressTarget",
326         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_COMPRESS_TARGET.get());
327    compressTarget.addLongIdentifier("compressOutput");
328    compressTarget.addLongIdentifier("compress");
329    compressTarget.addLongIdentifier("compress-target");
330    compressTarget.addLongIdentifier("compress-output");
331    parser.addArgument(compressTarget);
332
333    splitBaseDN = new DNArgument('b', "splitBaseDN", true, 1, null,
334         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SPLIT_BASE_DN.get());
335    splitBaseDN.addLongIdentifier("baseDN");
336    splitBaseDN.addLongIdentifier("split-base-dn");
337    splitBaseDN.addLongIdentifier("base-dn");
338    parser.addArgument(splitBaseDN);
339
340    addEntriesOutsideSplitBaseDNToAllSets = new BooleanArgument(null,
341         "addEntriesOutsideSplitBaseDNToAllSets", 1,
342         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_ALL_SETS.get());
343    addEntriesOutsideSplitBaseDNToAllSets.addLongIdentifier(
344         "add-entries-outside-split-base-dn-to-all-sets");
345    parser.addArgument(addEntriesOutsideSplitBaseDNToAllSets);
346
347    addEntriesOutsideSplitBaseDNToDedicatedSet = new BooleanArgument(null,
348         "addEntriesOutsideSplitBaseDNToDedicatedSet", 1,
349         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_DEDICATED_SET.get());
350    addEntriesOutsideSplitBaseDNToDedicatedSet.addLongIdentifier(
351         "add-entries-outside-split-base-dn-to-dedicated-set");
352    parser.addArgument(addEntriesOutsideSplitBaseDNToDedicatedSet);
353
354    schemaPath = new FileArgument(null, "schemaPath", false, 0, null,
355         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SCHEMA_PATH.get(), true, false, false,
356         false);
357    schemaPath.addLongIdentifier("schemaFile");
358    schemaPath.addLongIdentifier("schemaDirectory");
359    schemaPath.addLongIdentifier("schema-path");
360    schemaPath.addLongIdentifier("schema-file");
361    schemaPath.addLongIdentifier("schema-directory");
362    parser.addArgument(schemaPath);
363
364    numThreads = new IntegerArgument('t', "numThreads", false, 1, null,
365         INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_NUM_THREADS.get(), 1,
366         Integer.MAX_VALUE, 1);
367    numThreads.addLongIdentifier("num-threads");
368    parser.addArgument(numThreads);
369
370
371    // Add the subcommand used to split entries using a hash on the RDN.
372    final ArgumentParser splitUsingHashOnRDNParser = new ArgumentParser(
373         "split-using-hash-on-rdn", INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get());
374
375    splitUsingHashOnRDNNumSets = new IntegerArgument(null, "numSets", true, 1,
376         null, INFO_SPLIT_LDIF_SC_HASH_ON_RDN_ARG_DESC_NUM_SETS.get(), 2,
377         Integer.MAX_VALUE);
378    splitUsingHashOnRDNNumSets.addLongIdentifier("num-sets");
379    splitUsingHashOnRDNParser.addArgument(splitUsingHashOnRDNNumSets);
380
381    final LinkedHashMap<String[],String> splitUsingHashOnRDNExamples =
382         new LinkedHashMap<String[],String>(1);
383    splitUsingHashOnRDNExamples.put(
384         new String[]
385         {
386           "split-using-hash-on-rdn",
387           "--sourceLDIF", "whole.ldif",
388           "--targetLDIFBasePath", "split.ldif",
389           "--splitBaseDN", "ou=People,dc=example,dc=com",
390           "--numSets", "4",
391           "--schemaPath", "config/schema",
392           "--addEntriesOutsideSplitBaseDNToAllSets"
393         },
394         INFO_SPLIT_LDIF_SC_HASH_ON_RDN_EXAMPLE.get());
395
396    splitUsingHashOnRDN = new SubCommand("split-using-hash-on-rdn",
397         INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get(), splitUsingHashOnRDNParser,
398         splitUsingHashOnRDNExamples);
399    splitUsingHashOnRDN.addName("hash-on-rdn");
400
401    parser.addSubCommand(splitUsingHashOnRDN);
402
403
404    // Add the subcommand used to split entries using a hash on a specified
405    // attribute.
406    final ArgumentParser splitUsingHashOnAttributeParser = new ArgumentParser(
407         "split-using-hash-on-attribute",
408         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get());
409
410    splitUsingHashOnAttributeAttributeName = new StringArgument(null,
411         "attributeName", true, 1, "{attr}",
412         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ATTR_NAME.get());
413    splitUsingHashOnAttributeAttributeName.addLongIdentifier("attribute-name");
414    splitUsingHashOnAttributeParser.addArgument(
415         splitUsingHashOnAttributeAttributeName);
416
417    splitUsingHashOnAttributeNumSets = new IntegerArgument(null, "numSets",
418         true, 1, null, INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_NUM_SETS.get(),
419         2, Integer.MAX_VALUE);
420    splitUsingHashOnAttributeNumSets.addLongIdentifier("num-sets");
421    splitUsingHashOnAttributeParser.addArgument(
422         splitUsingHashOnAttributeNumSets);
423
424    splitUsingHashOnAttributeUseAllValues = new BooleanArgument(null,
425         "useAllValues", 1,
426         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ALL_VALUES.get());
427    splitUsingHashOnAttributeUseAllValues.addLongIdentifier("use-all-values");
428    splitUsingHashOnAttributeParser.addArgument(
429         splitUsingHashOnAttributeUseAllValues);
430
431    splitUsingHashOnAttributeAssumeFlatDIT = new BooleanArgument(null,
432         "assumeFlatDIT", 1,
433         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ASSUME_FLAT_DIT.get());
434    splitUsingHashOnAttributeAssumeFlatDIT.addLongIdentifier("assume-flat-dit");
435    splitUsingHashOnAttributeParser.addArgument(
436         splitUsingHashOnAttributeAssumeFlatDIT);
437
438    final LinkedHashMap<String[],String> splitUsingHashOnAttributeExamples =
439         new LinkedHashMap<String[],String>(1);
440    splitUsingHashOnAttributeExamples.put(
441         new String[]
442         {
443           "split-using-hash-on-attribute",
444           "--sourceLDIF", "whole.ldif",
445           "--targetLDIFBasePath", "split.ldif",
446           "--splitBaseDN", "ou=People,dc=example,dc=com",
447           "--attributeName", "uid",
448           "--numSets", "4",
449           "--schemaPath", "config/schema",
450           "--addEntriesOutsideSplitBaseDNToAllSets"
451         },
452         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_EXAMPLE.get());
453
454    splitUsingHashOnAttribute = new SubCommand("split-using-hash-on-attribute",
455         INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get(),
456         splitUsingHashOnAttributeParser, splitUsingHashOnAttributeExamples);
457    splitUsingHashOnAttribute.addName("hash-on-attribute");
458
459    parser.addSubCommand(splitUsingHashOnAttribute);
460
461
462    // Add the subcommand used to split entries by selecting the set with the
463    // fewest entries.
464    final ArgumentParser splitUsingFewestEntriesParser = new ArgumentParser(
465         "split-using-fewest-entries",
466         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get());
467
468    splitUsingFewestEntriesNumSets = new IntegerArgument(null, "numSets",
469         true, 1, null,
470         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_NUM_SETS.get(),
471         2, Integer.MAX_VALUE);
472    splitUsingFewestEntriesNumSets.addLongIdentifier("num-sets");
473    splitUsingFewestEntriesParser.addArgument(splitUsingFewestEntriesNumSets);
474
475    splitUsingFewestEntriesAssumeFlatDIT = new BooleanArgument(null,
476         "assumeFlatDIT", 1,
477         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_ASSUME_FLAT_DIT.get());
478    splitUsingFewestEntriesAssumeFlatDIT.addLongIdentifier("assume-flat-dit");
479    splitUsingFewestEntriesParser.addArgument(
480         splitUsingFewestEntriesAssumeFlatDIT);
481
482    final LinkedHashMap<String[],String> splitUsingFewestEntriesExamples =
483         new LinkedHashMap<String[],String>(1);
484    splitUsingFewestEntriesExamples.put(
485         new String[]
486         {
487           "split-using-fewest-entries",
488           "--sourceLDIF", "whole.ldif",
489           "--targetLDIFBasePath", "split.ldif",
490           "--splitBaseDN", "ou=People,dc=example,dc=com",
491           "--numSets", "4",
492           "--schemaPath", "config/schema",
493           "--addEntriesOutsideSplitBaseDNToAllSets"
494         },
495         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_EXAMPLE.get());
496
497    splitUsingFewestEntries = new SubCommand("split-using-fewest-entries",
498         INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get(),
499         splitUsingFewestEntriesParser, splitUsingFewestEntriesExamples);
500    splitUsingFewestEntries.addName("fewest-entries");
501
502    parser.addSubCommand(splitUsingFewestEntries);
503
504
505    // Add the subcommand used to split entries by selecting the set based on a
506    // filter.
507    final ArgumentParser splitUsingFilterParser = new ArgumentParser(
508         "split-using-filter", INFO_SPLIT_LDIF_SC_FILTER_DESC.get());
509
510    splitUsingFilterFilter = new FilterArgument(null, "filter", true, 0, null,
511         INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_FILTER.get());
512    splitUsingFilterParser.addArgument(splitUsingFilterFilter);
513
514    splitUsingFilterAssumeFlatDIT = new BooleanArgument(null, "assumeFlatDIT",
515         1, INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_ASSUME_FLAT_DIT.get());
516    splitUsingFilterAssumeFlatDIT.addLongIdentifier("assume-flat-dit");
517    splitUsingFilterParser.addArgument(splitUsingFilterAssumeFlatDIT);
518
519    final LinkedHashMap<String[],String> splitUsingFilterExamples =
520         new LinkedHashMap<String[],String>(1);
521    splitUsingFilterExamples.put(
522         new String[]
523         {
524           "split-using-filter",
525           "--sourceLDIF", "whole.ldif",
526           "--targetLDIFBasePath", "split.ldif",
527           "--splitBaseDN", "ou=People,dc=example,dc=com",
528           "--filter", "(timeZone=Eastern)",
529           "--filter", "(timeZone=Central)",
530           "--filter", "(timeZone=Mountain)",
531           "--filter", "(timeZone=Pacific)",
532           "--schemaPath", "config/schema",
533           "--addEntriesOutsideSplitBaseDNToAllSets"
534         },
535         INFO_SPLIT_LDIF_SC_FILTER_EXAMPLE.get());
536
537    splitUsingFilter = new SubCommand("split-using-filter",
538         INFO_SPLIT_LDIF_SC_FILTER_DESC.get(),
539         splitUsingFilterParser, splitUsingFilterExamples);
540    splitUsingFilter.addName("filter");
541
542    parser.addSubCommand(splitUsingFilter);
543  }
544
545
546
547  /**
548   * {@inheritDoc}
549   */
550  @Override()
551  public void doExtendedArgumentValidation()
552         throws ArgumentException
553  {
554    // If multiple sourceLDIF values were provided, then a target LDIF base path
555    // must have been given.
556    final List<File> sourceLDIFValues = sourceLDIF.getValues();
557    if (sourceLDIFValues.size() > 1)
558    {
559      if (! targetLDIFBasePath.isPresent())
560      {
561        throw new ArgumentException(ERR_SPLIT_LDIF_NO_TARGET_BASE_PATH.get(
562             sourceLDIF.getIdentifierString(),
563             targetLDIFBasePath.getIdentifierString()));
564      }
565    }
566
567
568    // If the split-using-filter subcommand was provided, then at least two
569    // filters must have been provided, and none of the filters can be logically
570    // equivalent to any of the others.
571    if (splitUsingFilter.isPresent())
572    {
573      final List<Filter> filterList = splitUsingFilterFilter.getValues();
574      final Set<Filter> filterSet =
575           new LinkedHashSet<Filter>(filterList.size());
576      for (final Filter f : filterList)
577      {
578        if (filterSet.contains(f))
579        {
580          throw new ArgumentException(ERR_SPLIT_LDIF_NON_UNIQUE_FILTER.get(
581               splitUsingFilterFilter.getIdentifierString(), f.toString()));
582        }
583        else
584        {
585          filterSet.add(f);
586        }
587      }
588
589      if (filterSet.size() < 2)
590      {
591        throw new ArgumentException(ERR_SPLIT_LDIF_NOT_ENOUGH_FILTERS.get(
592             splitUsingFilter.getPrimaryName(),
593             splitUsingFilterFilter.getIdentifierString()));
594      }
595    }
596  }
597
598
599
600  /**
601   * {@inheritDoc}
602   */
603  @Override()
604  public ResultCode doToolProcessing()
605  {
606    // Get the schema to use during processing.
607    final Schema schema;
608    try
609    {
610      schema = getSchema();
611    }
612    catch (final LDAPException le)
613    {
614      wrapErr(0, MAX_OUTPUT_LINE_LENGTH, le.getMessage());
615      return le.getResultCode();
616    }
617
618
619    // Figure out which subcommand was selected, and create the appropriate
620    // translator to use to perform the processing.
621    final SplitLDIFTranslator translator;
622    if (splitUsingHashOnRDN.isPresent())
623    {
624      translator = new SplitLDIFRDNHashTranslator(splitBaseDN.getValue(),
625           splitUsingHashOnRDNNumSets.getValue(),
626           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
627           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
628    }
629    else if (splitUsingHashOnAttribute.isPresent())
630    {
631      translator = new SplitLDIFAttributeHashTranslator(splitBaseDN.getValue(),
632           splitUsingHashOnAttributeNumSets.getValue(),
633           splitUsingHashOnAttributeAttributeName.getValue(),
634           splitUsingHashOnAttributeUseAllValues.isPresent(),
635           splitUsingHashOnAttributeAssumeFlatDIT.isPresent(),
636           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
637           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
638    }
639    else if (splitUsingFewestEntries.isPresent())
640    {
641      translator = new SplitLDIFFewestEntriesTranslator(splitBaseDN.getValue(),
642           splitUsingFewestEntriesNumSets.getValue(),
643           splitUsingFewestEntriesAssumeFlatDIT.isPresent(),
644           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
645           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
646    }
647    else if (splitUsingFilter.isPresent())
648    {
649      final List<Filter> filterList = splitUsingFilterFilter.getValues();
650      final LinkedHashSet<Filter> filterSet =
651           new LinkedHashSet<Filter>(filterList.size());
652      for (final Filter f : filterList)
653      {
654        filterSet.add(f);
655      }
656
657      translator = new SplitLDIFFilterTranslator(splitBaseDN.getValue(),
658           schema, filterSet, splitUsingFilterAssumeFlatDIT.isPresent(),
659           addEntriesOutsideSplitBaseDNToAllSets.isPresent(),
660           addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent());
661    }
662    else
663    {
664      // This should never happen.
665      wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
666           ERR_SPLIT_LDIF_CANNOT_DETERMINE_SPLIT_ALGORITHM.get(
667                splitUsingHashOnRDN.getPrimaryName() + ", " +
668                splitUsingHashOnAttribute.getPrimaryName() + ", " +
669                splitUsingFewestEntries.getPrimaryName() + ", " +
670                splitUsingFilter.getPrimaryName()));
671      return ResultCode.PARAM_ERROR;
672    }
673
674
675    // Create the LDIF reader.
676    final LDIFReader ldifReader;
677    try
678    {
679      InputStream inputStream;
680      if (sourceLDIF.isPresent())
681      {
682        final List<File> sourceFiles = sourceLDIF.getValues();
683        final ArrayList<InputStream> fileInputStreams =
684             new ArrayList<InputStream>(2*sourceFiles.size());
685        for (final File f : sourceFiles)
686        {
687          if (! fileInputStreams.isEmpty())
688          {
689            // Go ahead and ensure that there are at least new end-of-line
690            // markers between each file.  Otherwise, it's possible for entries
691            // to run together.
692            final byte[] doubleEOL = new byte[StaticUtils.EOL_BYTES.length * 2];
693            System.arraycopy(StaticUtils.EOL_BYTES, 0, doubleEOL, 0,
694                 StaticUtils.EOL_BYTES.length);
695            System.arraycopy(StaticUtils.EOL_BYTES, 0, doubleEOL,
696                 StaticUtils.EOL_BYTES.length, StaticUtils.EOL_BYTES.length);
697            fileInputStreams.add(new ByteArrayInputStream(doubleEOL));
698          }
699          fileInputStreams.add(new FileInputStream(f));
700        }
701
702        if (fileInputStreams.size() == 1)
703        {
704          inputStream = fileInputStreams.get(0);
705        }
706        else
707        {
708          inputStream = new AggregateInputStream(fileInputStreams);
709        }
710      }
711      else
712      {
713        inputStream = System.in;
714      }
715
716      if (sourceCompressed.isPresent())
717      {
718        inputStream = new GZIPInputStream(inputStream);
719      }
720
721      ldifReader = new LDIFReader(inputStream, numThreads.getValue(),
722           translator);
723      if (schema != null)
724      {
725        ldifReader.setSchema(schema);
726      }
727    }
728    catch (final Exception e)
729    {
730      Debug.debugException(e);
731      wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
732           ERR_SPLIT_LDIF_ERROR_CREATING_LDIF_READER.get(
733                StaticUtils.getExceptionMessage(e)));
734      return ResultCode.LOCAL_ERROR;
735    }
736
737
738    // Iterate through and process all of the entries.
739    ResultCode resultCode = ResultCode.SUCCESS;
740    final LinkedHashMap<String,OutputStream> outputStreams =
741         new LinkedHashMap<String,OutputStream>(10);
742    try
743    {
744      final AtomicLong entriesRead = new AtomicLong(0L);
745      final AtomicLong entriesExcluded = new AtomicLong(0L);
746      final TreeMap<String,AtomicLong> fileCounts =
747           new TreeMap<String,AtomicLong>();
748
749readLoop:
750      while (true)
751      {
752        final SplitLDIFEntry entry;
753        try
754        {
755          entry = (SplitLDIFEntry) ldifReader.readEntry();
756        }
757        catch (final LDIFException le)
758        {
759          Debug.debugException(le);
760          resultCode = ResultCode.LOCAL_ERROR;
761
762          final File f = getOutputFile(SplitLDIFEntry.SET_NAME_ERRORS);
763          OutputStream s = outputStreams.get(SplitLDIFEntry.SET_NAME_ERRORS);
764          if (s == null)
765          {
766            try
767            {
768              s = new FileOutputStream(f);
769              if (compressTarget.isPresent())
770              {
771                s = new GZIPOutputStream(s);
772              }
773
774              outputStreams.put(SplitLDIFEntry.SET_NAME_ERRORS, s);
775              fileCounts.put(SplitLDIFEntry.SET_NAME_ERRORS,
776                   new AtomicLong(0L));
777            }
778            catch (final Exception e)
779            {
780              Debug.debugException(e);
781              resultCode = ResultCode.LOCAL_ERROR;
782              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
783                   ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get(
784                        f.getAbsolutePath(),
785                        StaticUtils.getExceptionMessage(e)));
786              break readLoop;
787            }
788          }
789
790          final ByteStringBuffer buffer = new ByteStringBuffer();
791          buffer.append("# ");
792          buffer.append(le.getMessage());
793          buffer.append(StaticUtils.EOL_BYTES);
794
795          final List<String> dataLines = le.getDataLines();
796          if (dataLines != null)
797          {
798            for (final String dataLine : dataLines)
799            {
800              buffer.append(dataLine);
801              buffer.append(StaticUtils.EOL_BYTES);
802            }
803          }
804
805          buffer.append(StaticUtils.EOL_BYTES);
806
807          try
808          {
809            s.write(buffer.toByteArray());
810          }
811          catch (final Exception e)
812          {
813              Debug.debugException(e);
814              resultCode = ResultCode.LOCAL_ERROR;
815              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
816                   ERR_SPLIT_LDIF_ERROR_WRITING_ERROR_TO_FILE.get(
817                        le.getMessage(), f.getAbsolutePath(),
818                        StaticUtils.getExceptionMessage(e)));
819              break readLoop;
820          }
821
822          if (le.mayContinueReading())
823          {
824            wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
825                 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_RECOVERABLE.get(
826                      StaticUtils.getExceptionMessage(le)));
827            continue;
828          }
829          else
830          {
831            wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
832                 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_UNRECOVERABLE.get(
833                      StaticUtils.getExceptionMessage(le)));
834            break;
835          }
836        }
837        catch (final IOException ioe)
838        {
839          Debug.debugException(ioe);
840          resultCode = ResultCode.LOCAL_ERROR;
841          wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
842               ERR_SPLIT_LDIF_IO_READ_ERROR.get(
843                    StaticUtils.getExceptionMessage(ioe)));
844          break;
845        }
846        catch (final Exception e)
847        {
848          Debug.debugException(e);
849          resultCode = ResultCode.LOCAL_ERROR;
850          wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
851               ERR_SPLIT_LDIF_UNEXPECTED_READ_ERROR.get(
852                    StaticUtils.getExceptionMessage(e)));
853          break;
854        }
855
856        if (entry == null)
857        {
858          break;
859        }
860
861        final long readCount = entriesRead.incrementAndGet();
862        if ((readCount % 1000L) == 0)
863        {
864          // Even though we aren't done with this entry yet, we'll go ahead and
865          // log a progress message now because it's easier to do that now than
866          // to ensure that it's handled properly through all possible error
867          // conditions that need to be handled below.
868          wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
869               INFO_SPLIT_LDIF_PROGRESS.get(readCount));
870        }
871
872
873        // Get the set(s) to which the entry should be written.  If this is
874        // null (which could be the case as a result of a race condition when
875        // using multiple threads where processing for a child completes before
876        // processing for its parent, or as a result of a case in which a
877        // child is included without or before its parent), then try to see if
878        // we can get the sets by passing the entry through the translator.
879        Set<String> sets = entry.getSets();
880        byte[] ldifBytes = entry.getLDIFBytes();
881        if (sets == null)
882        {
883          try
884          {
885            sets = translator.translate(entry, 0L).getSets();
886          }
887          catch (final Exception e)
888          {
889            Debug.debugException(e);
890          }
891
892          if (sets == null)
893          {
894            final SplitLDIFEntry errorEntry =  translator.createEntry(entry,
895                 ERR_SPLIT_LDIF_ENTRY_WITHOUT_PARENT.get(
896                      entry.getDN(), splitBaseDN.getStringValue()),
897                 Collections.singleton(SplitLDIFEntry.SET_NAME_ERRORS));
898            ldifBytes = errorEntry.getLDIFBytes();
899            sets = errorEntry.getSets();
900          }
901        }
902
903
904        // If the entry shouldn't be written into any sets, then we don't need
905        // to do anything else.
906        if (sets.isEmpty())
907        {
908          entriesExcluded.incrementAndGet();
909          continue;
910        }
911
912
913        // Write the entry into each of the target sets, creating the output
914        // files if necessary.
915        for (final String set : sets)
916        {
917          if (set.equals(SplitLDIFEntry.SET_NAME_ERRORS))
918          {
919            // This indicates that an error was encountered during processing,
920            // so we'll update the result code to reflect that.
921            resultCode = ResultCode.LOCAL_ERROR;
922          }
923
924          final File f = getOutputFile(set);
925          OutputStream s = outputStreams.get(set);
926          if (s == null)
927          {
928            try
929            {
930              s = new FileOutputStream(f);
931              if (compressTarget.isPresent())
932              {
933                s = new GZIPOutputStream(s);
934              }
935
936              outputStreams.put(set, s);
937              fileCounts.put(set, new AtomicLong(0L));
938            }
939            catch (final Exception e)
940            {
941              Debug.debugException(e);
942              resultCode = ResultCode.LOCAL_ERROR;
943              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
944                   ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get(
945                        f.getAbsolutePath(),
946                        StaticUtils.getExceptionMessage(e)));
947              break readLoop;
948            }
949          }
950
951          try
952          {
953            s.write(ldifBytes);
954          }
955          catch (final Exception e)
956          {
957              Debug.debugException(e);
958              resultCode = ResultCode.LOCAL_ERROR;
959              wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
960                   ERR_SPLIT_LDIF_ERROR_WRITING_TO_FILE.get(
961                        entry.getDN(), f.getAbsolutePath(),
962                        StaticUtils.getExceptionMessage(e)));
963              break readLoop;
964          }
965
966          fileCounts.get(set).incrementAndGet();
967        }
968      }
969
970
971      // Processing is complete.  Summarize the processing that was performed.
972      final long finalReadCount = entriesRead.get();
973      if (finalReadCount > 1000L)
974      {
975        out();
976      }
977
978      wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
979           INFO_SPLIT_LDIF_PROCESSING_COMPLETE.get(finalReadCount));
980
981      final long excludedCount = entriesExcluded.get();
982      if (excludedCount > 0L)
983      {
984        wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
985             INFO_SPLIT_LDIF_EXCLUDED_COUNT.get(excludedCount));
986      }
987
988      for (final Map.Entry<String,AtomicLong> e : fileCounts.entrySet())
989      {
990        final File f = getOutputFile(e.getKey());
991        wrapOut(0, MAX_OUTPUT_LINE_LENGTH,
992             INFO_SPLIT_LDIF_COUNT_TO_FILE.get(e.getValue().get(),
993                  f.getName()));
994      }
995    }
996    finally
997    {
998      try
999      {
1000        ldifReader.close();
1001      }
1002      catch (final Exception e)
1003      {
1004        Debug.debugException(e);
1005      }
1006
1007      for (final Map.Entry<String,OutputStream> e : outputStreams.entrySet())
1008      {
1009        try
1010        {
1011          e.getValue().close();
1012        }
1013        catch (final Exception ex)
1014        {
1015          Debug.debugException(ex);
1016          resultCode = ResultCode.LOCAL_ERROR;
1017          wrapErr(0, MAX_OUTPUT_LINE_LENGTH,
1018               ERR_SPLIT_LDIF_ERROR_CLOSING_FILE.get(
1019                    getOutputFile(e.getKey()),
1020                    StaticUtils.getExceptionMessage(ex)));
1021        }
1022      }
1023    }
1024
1025    return resultCode;
1026  }
1027
1028
1029
1030  /**
1031   * Retrieves the schema that should be used for processing.
1032   *
1033   * @return  The schema that was created.
1034   *
1035   * @throws  LDAPException  If a problem is encountered while retrieving the
1036   *                         schema.
1037   */
1038  private Schema getSchema()
1039          throws LDAPException
1040  {
1041    // If any schema paths were specified, then load the schema only from those
1042    // paths.
1043    if (schemaPath.isPresent())
1044    {
1045      final ArrayList<File> schemaFiles = new ArrayList<File>(10);
1046      for (final File path : schemaPath.getValues())
1047      {
1048        if (path.isFile())
1049        {
1050          schemaFiles.add(path);
1051        }
1052        else
1053        {
1054          final TreeMap<String,File> fileMap = new TreeMap<String,File>();
1055          for (final File schemaDirFile : path.listFiles())
1056          {
1057            final String name = schemaDirFile.getName();
1058            if (schemaDirFile.isFile() && name.toLowerCase().endsWith(".ldif"))
1059            {
1060              fileMap.put(name, schemaDirFile);
1061            }
1062          }
1063          schemaFiles.addAll(fileMap.values());
1064        }
1065      }
1066
1067      if (schemaFiles.isEmpty())
1068      {
1069        throw new LDAPException(ResultCode.PARAM_ERROR,
1070             ERR_SPLIT_LDIF_NO_SCHEMA_FILES.get(
1071                  schemaPath.getIdentifierString()));
1072      }
1073      else
1074      {
1075        try
1076        {
1077          return Schema.getSchema(schemaFiles);
1078        }
1079        catch (final Exception e)
1080        {
1081          Debug.debugException(e);
1082          throw new LDAPException(ResultCode.LOCAL_ERROR,
1083               ERR_SPLIT_LDIF_ERROR_LOADING_SCHEMA.get(
1084                    StaticUtils.getExceptionMessage(e)));
1085        }
1086      }
1087    }
1088    else
1089    {
1090      // If the INSTANCE_ROOT environment variable is set and it refers to a
1091      // directory that has a config/schema subdirectory that has one or more
1092      // schema files in it, then read the schema from that directory.
1093      try
1094      {
1095        final String instanceRootStr = System.getenv("INSTANCE_ROOT");
1096        if (instanceRootStr != null)
1097        {
1098          final File instanceRoot = new File(instanceRootStr);
1099          final File configDir = new File(instanceRoot, "config");
1100          final File schemaDir = new File(configDir, "schema");
1101          if (schemaDir.exists())
1102          {
1103            final TreeMap<String,File> fileMap = new TreeMap<String,File>();
1104            for (final File schemaDirFile : schemaDir.listFiles())
1105            {
1106              final String name = schemaDirFile.getName();
1107              if (schemaDirFile.isFile() &&
1108                  name.toLowerCase().endsWith(".ldif"))
1109              {
1110                fileMap.put(name, schemaDirFile);
1111              }
1112            }
1113
1114            if (! fileMap.isEmpty())
1115            {
1116              return Schema.getSchema(new ArrayList<File>(fileMap.values()));
1117            }
1118          }
1119        }
1120      }
1121      catch (final Exception e)
1122      {
1123        Debug.debugException(e);
1124      }
1125    }
1126
1127
1128    // If we've gotten here, then just return null and the tool will try to use
1129    // the default standard schema.
1130    return null;
1131  }
1132
1133
1134
1135  /**
1136   * Retrieves a file object that refers to an output file with the provided
1137   * extension.
1138   *
1139   * @param  extension  The extension to use for the file.
1140   *
1141   * @return  A file object that refers to an output file with the provided
1142   *          extension.
1143   */
1144  private File getOutputFile(final String extension)
1145  {
1146    final File baseFile;
1147    if (targetLDIFBasePath.isPresent())
1148    {
1149      baseFile = targetLDIFBasePath.getValue();
1150    }
1151    else
1152    {
1153      baseFile = sourceLDIF.getValue();
1154    }
1155
1156    return new File(baseFile.getAbsolutePath() + extension);
1157  }
1158
1159
1160
1161  /**
1162   * {@inheritDoc}
1163   */
1164  @Override()
1165  public LinkedHashMap<String[],String> getExampleUsages()
1166  {
1167    final LinkedHashMap<String[],String> exampleMap =
1168         new LinkedHashMap<String[],String>(4);
1169
1170    for (final Map.Entry<String[],String> e :
1171         splitUsingHashOnRDN.getExampleUsages().entrySet())
1172    {
1173      exampleMap.put(e.getKey(), e.getValue());
1174    }
1175
1176    for (final Map.Entry<String[],String> e :
1177         splitUsingHashOnAttribute.getExampleUsages().entrySet())
1178    {
1179      exampleMap.put(e.getKey(), e.getValue());
1180    }
1181
1182    for (final Map.Entry<String[],String> e :
1183         splitUsingFewestEntries.getExampleUsages().entrySet())
1184    {
1185      exampleMap.put(e.getKey(), e.getValue());
1186    }
1187
1188    for (final Map.Entry<String[],String> e :
1189         splitUsingFilter.getExampleUsages().entrySet())
1190    {
1191      exampleMap.put(e.getKey(), e.getValue());
1192    }
1193
1194    return exampleMap;
1195  }
1196}