001/* 002 * Copyright 2016-2017 Ping Identity Corporation 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright (C) 2016-2017 Ping Identity Corporation 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021package com.unboundid.ldap.sdk.unboundidds.tools; 022 023 024 025import java.io.ByteArrayInputStream; 026import java.io.File; 027import java.io.FileInputStream; 028import java.io.FileOutputStream; 029import java.io.InputStream; 030import java.io.IOException; 031import java.io.OutputStream; 032import java.util.ArrayList; 033import java.util.Collections; 034import java.util.LinkedHashMap; 035import java.util.LinkedHashSet; 036import java.util.List; 037import java.util.Map; 038import java.util.Set; 039import java.util.TreeMap; 040import java.util.concurrent.atomic.AtomicLong; 041import java.util.zip.GZIPInputStream; 042import java.util.zip.GZIPOutputStream; 043 044import com.unboundid.ldap.sdk.Filter; 045import com.unboundid.ldap.sdk.LDAPException; 046import com.unboundid.ldap.sdk.ResultCode; 047import com.unboundid.ldap.sdk.Version; 048import com.unboundid.ldap.sdk.schema.Schema; 049import com.unboundid.ldif.LDIFException; 050import com.unboundid.ldif.LDIFReader; 051import com.unboundid.util.ByteStringBuffer; 052import com.unboundid.util.CommandLineTool; 053import com.unboundid.util.AggregateInputStream; 054import com.unboundid.util.Debug; 055import com.unboundid.util.StaticUtils; 056import com.unboundid.util.ThreadSafety; 057import com.unboundid.util.ThreadSafetyLevel; 058import com.unboundid.util.args.ArgumentException; 059import com.unboundid.util.args.ArgumentParser; 060import com.unboundid.util.args.BooleanArgument; 061import com.unboundid.util.args.DNArgument; 062import com.unboundid.util.args.FileArgument; 063import com.unboundid.util.args.FilterArgument; 064import com.unboundid.util.args.IntegerArgument; 065import com.unboundid.util.args.SubCommand; 066import com.unboundid.util.args.StringArgument; 067 068import static com.unboundid.ldap.sdk.unboundidds.tools.ToolMessages.*; 069 070 071 072/** 073 * This class provides a command-line tool that can be used to split an LDIF 074 * file below a specified base DN. This can be used to help initialize an 075 * entry-balancing deployment for use with the Directory Proxy Server. 076 * <BR> 077 * <BLOCKQUOTE> 078 * <B>NOTE:</B> This class, and other classes within the 079 * {@code com.unboundid.ldap.sdk.unboundidds} package structure, are only 080 * supported for use against Ping Identity, UnboundID, and Alcatel-Lucent 8661 081 * server products. These classes provide support for proprietary 082 * functionality or for external specifications that are not considered stable 083 * or mature enough to be guaranteed to work in an interoperable way with 084 * other types of LDAP servers. 085 * </BLOCKQUOTE> 086 * <BR> 087 * It supports a number of algorithms for determining how to split the data, 088 * including: 089 * <UL> 090 * <LI> 091 * split-using-hash-on-rdn -- The tool will compute a digest of the DN 092 * component that is immediately below the split base DN, and will use a 093 * modulus to select a backend set for a given entry. Since the split is 094 * based purely on computation involving the DN, the there is no need for 095 * caching to ensure that children are placed in the same sets as their 096 * parent, which allows it to run effectively with a small memory footprint. 097 * </LI> 098 * <LI> 099 * split-using-hash-on-attribute -- The tool will compute a digest of the 100 * value(s) of a specified attribute, and will use a modulus to select a 101 * backend set for a given entry. This hash will only be computed for 102 * entries immediately below the split base DN, and a cache will be used to 103 * ensure that entries more than one level below the split base DN are 104 * placed in the same backend set as their parent. 105 * </LI> 106 * <LI> 107 * split-using-fewest-entries -- When examining an entry immediately below 108 * the split base DN, the tool will place that entry in the set that has the 109 * fewest entries. For flat DITs in which entries only exist one level 110 * below the split base DN, this will effectively ensure a round-robin 111 * distribution. But for cases in which there are branches of varying sizes 112 * below the split base DN, this can help ensure that entries are more 113 * evenly distributed across backend sets. A cache will be used to ensure 114 * that entries more than one level below the split base DN are placed in 115 * the same backend set as their parent. 116 * </LI> 117 * <LI> 118 * split-using-filter -- When examining an entry immediately below the split 119 * base DN, a series of filters will be evaluated against that entry, which 120 * each filter associated with a specific backend set. If an entry doesn't 121 * match any of the provided filters, an RDN hash can be used to select the 122 * set. A cache will be used to ensure that entries more than one level 123 * below the split base DN are placed in the same backend set as their 124 * parent. 125 * </LI> 126 * </UL> 127 */ 128@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE) 129public final class SplitLDIF 130 extends CommandLineTool 131{ 132 /** 133 * The maximum length of any message to write to standard output or standard 134 * error. 135 */ 136 private static final int MAX_OUTPUT_LINE_LENGTH = 137 StaticUtils.TERMINAL_WIDTH_COLUMNS - 1; 138 139 140 141 // The global arguments used by this tool. 142 private BooleanArgument addEntriesOutsideSplitBaseDNToAllSets = null; 143 private BooleanArgument addEntriesOutsideSplitBaseDNToDedicatedSet = null; 144 private BooleanArgument compressTarget = null; 145 private BooleanArgument sourceCompressed = null; 146 private DNArgument splitBaseDN = null; 147 private FileArgument schemaPath = null; 148 private FileArgument sourceLDIF = null; 149 private FileArgument targetLDIFBasePath = null; 150 private IntegerArgument numThreads = null; 151 152 // The arguments used to split using a hash of the RDN. 153 private IntegerArgument splitUsingHashOnRDNNumSets = null; 154 private SubCommand splitUsingHashOnRDN = null; 155 156 // The arguments used to split using a hash on a specified attribute. 157 private BooleanArgument splitUsingHashOnAttributeAssumeFlatDIT = null; 158 private BooleanArgument splitUsingHashOnAttributeUseAllValues = null; 159 private IntegerArgument splitUsingHashOnAttributeNumSets = null; 160 private StringArgument splitUsingHashOnAttributeAttributeName = null; 161 private SubCommand splitUsingHashOnAttribute = null; 162 163 // The arguments used to choose the set with the fewest entries. 164 private BooleanArgument splitUsingFewestEntriesAssumeFlatDIT = null; 165 private IntegerArgument splitUsingFewestEntriesNumSets = null; 166 private SubCommand splitUsingFewestEntries = null; 167 168 // The arguments used to choose the set using a provided set of filters. 169 private BooleanArgument splitUsingFilterAssumeFlatDIT = null; 170 private FilterArgument splitUsingFilterFilter = null; 171 private SubCommand splitUsingFilter = null; 172 173 174 175 /** 176 * Runs the tool with the provided set of command-line arguments. 177 * 178 * @param args The command-line arguments provided to this tool. 179 */ 180 public static void main(final String... args) 181 { 182 final ResultCode resultCode = main(System.out, System.err, args); 183 if (resultCode != ResultCode.SUCCESS) 184 { 185 System.exit(resultCode.intValue()); 186 } 187 } 188 189 190 191 /** 192 * Runs the tool with the provided set of command-line arguments. 193 * 194 * @param out The output stream used for standard output. It may be 195 * {@code null} if standard output should be suppressed. 196 * @param err The output stream used for standard error. It may be 197 * {@code null} if standard error should be suppressed. 198 * @param args The command-line arguments provided to this tool. 199 * 200 * @return A result code with information about the processing performed. 201 * Any result code other than {@link ResultCode#SUCCESS} indicates 202 * that an error occurred. 203 */ 204 public static ResultCode main(final OutputStream out, final OutputStream err, 205 final String... args) 206 { 207 final SplitLDIF tool = new SplitLDIF(out, err); 208 return tool.runTool(args); 209 } 210 211 212 213 /** 214 * Creates a new instance of this tool with the provided information. 215 * 216 * @param out The output stream used for standard output. It may be 217 * {@code null} if standard output should be suppressed. 218 * @param err The output stream used for standard error. It may be 219 * {@code null} if standard error should be suppressed. 220 */ 221 public SplitLDIF(final OutputStream out, final OutputStream err) 222 { 223 super(out, err); 224 } 225 226 227 228 /** 229 * {@inheritDoc} 230 */ 231 @Override() 232 public String getToolName() 233 { 234 return "split-ldif"; 235 } 236 237 238 239 /** 240 * {@inheritDoc} 241 */ 242 @Override() 243 public String getToolDescription() 244 { 245 return INFO_SPLIT_LDIF_TOOL_DESCRIPTION.get(); 246 } 247 248 249 250 /** 251 * {@inheritDoc} 252 */ 253 @Override() 254 public String getToolVersion() 255 { 256 return Version.NUMERIC_VERSION_STRING; 257 } 258 259 260 261 /** 262 * {@inheritDoc} 263 */ 264 @Override() 265 public boolean supportsInteractiveMode() 266 { 267 return true; 268 } 269 270 271 272 /** 273 * {@inheritDoc} 274 */ 275 @Override() 276 public boolean defaultsToInteractiveMode() 277 { 278 return true; 279 } 280 281 282 283 /** 284 * {@inheritDoc} 285 */ 286 @Override() 287 public boolean supportsPropertiesFile() 288 { 289 return true; 290 } 291 292 293 294 /** 295 * {@inheritDoc} 296 */ 297 @Override() 298 public void addToolArguments(final ArgumentParser parser) 299 throws ArgumentException 300 { 301 // Add the global arguments. 302 sourceLDIF = new FileArgument('l', "sourceLDIF", true, 0, null, 303 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_LDIF.get(), true, false, true, 304 false); 305 sourceLDIF.addLongIdentifier("inputLDIF"); 306 sourceLDIF.addLongIdentifier("source-ldif"); 307 sourceLDIF.addLongIdentifier("input-ldif"); 308 parser.addArgument(sourceLDIF); 309 310 sourceCompressed = new BooleanArgument('C', "sourceCompressed", 311 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SOURCE_COMPRESSED.get()); 312 sourceCompressed.addLongIdentifier("inputCompressed"); 313 sourceCompressed.addLongIdentifier("source-compressed"); 314 sourceCompressed.addLongIdentifier("input-compressed"); 315 parser.addArgument(sourceCompressed); 316 317 targetLDIFBasePath = new FileArgument('o', "targetLDIFBasePath", false, 1, 318 null, INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_TARGET_LDIF_BASE.get(), false, 319 true, true, false); 320 targetLDIFBasePath.addLongIdentifier("outputLDIFBasePath"); 321 targetLDIFBasePath.addLongIdentifier("target-ldif-base-path"); 322 targetLDIFBasePath.addLongIdentifier("output-ldif-base-path"); 323 parser.addArgument(targetLDIFBasePath); 324 325 compressTarget = new BooleanArgument('c', "compressTarget", 326 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_COMPRESS_TARGET.get()); 327 compressTarget.addLongIdentifier("compressOutput"); 328 compressTarget.addLongIdentifier("compress"); 329 compressTarget.addLongIdentifier("compress-target"); 330 compressTarget.addLongIdentifier("compress-output"); 331 parser.addArgument(compressTarget); 332 333 splitBaseDN = new DNArgument('b', "splitBaseDN", true, 1, null, 334 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SPLIT_BASE_DN.get()); 335 splitBaseDN.addLongIdentifier("baseDN"); 336 splitBaseDN.addLongIdentifier("split-base-dn"); 337 splitBaseDN.addLongIdentifier("base-dn"); 338 parser.addArgument(splitBaseDN); 339 340 addEntriesOutsideSplitBaseDNToAllSets = new BooleanArgument(null, 341 "addEntriesOutsideSplitBaseDNToAllSets", 1, 342 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_ALL_SETS.get()); 343 addEntriesOutsideSplitBaseDNToAllSets.addLongIdentifier( 344 "add-entries-outside-split-base-dn-to-all-sets"); 345 parser.addArgument(addEntriesOutsideSplitBaseDNToAllSets); 346 347 addEntriesOutsideSplitBaseDNToDedicatedSet = new BooleanArgument(null, 348 "addEntriesOutsideSplitBaseDNToDedicatedSet", 1, 349 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_OUTSIDE_TO_DEDICATED_SET.get()); 350 addEntriesOutsideSplitBaseDNToDedicatedSet.addLongIdentifier( 351 "add-entries-outside-split-base-dn-to-dedicated-set"); 352 parser.addArgument(addEntriesOutsideSplitBaseDNToDedicatedSet); 353 354 schemaPath = new FileArgument(null, "schemaPath", false, 0, null, 355 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_SCHEMA_PATH.get(), true, false, false, 356 false); 357 schemaPath.addLongIdentifier("schemaFile"); 358 schemaPath.addLongIdentifier("schemaDirectory"); 359 schemaPath.addLongIdentifier("schema-path"); 360 schemaPath.addLongIdentifier("schema-file"); 361 schemaPath.addLongIdentifier("schema-directory"); 362 parser.addArgument(schemaPath); 363 364 numThreads = new IntegerArgument('t', "numThreads", false, 1, null, 365 INFO_SPLIT_LDIF_GLOBAL_ARG_DESC_NUM_THREADS.get(), 1, 366 Integer.MAX_VALUE, 1); 367 numThreads.addLongIdentifier("num-threads"); 368 parser.addArgument(numThreads); 369 370 371 // Add the subcommand used to split entries using a hash on the RDN. 372 final ArgumentParser splitUsingHashOnRDNParser = new ArgumentParser( 373 "split-using-hash-on-rdn", INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get()); 374 375 splitUsingHashOnRDNNumSets = new IntegerArgument(null, "numSets", true, 1, 376 null, INFO_SPLIT_LDIF_SC_HASH_ON_RDN_ARG_DESC_NUM_SETS.get(), 2, 377 Integer.MAX_VALUE); 378 splitUsingHashOnRDNNumSets.addLongIdentifier("num-sets"); 379 splitUsingHashOnRDNParser.addArgument(splitUsingHashOnRDNNumSets); 380 381 final LinkedHashMap<String[],String> splitUsingHashOnRDNExamples = 382 new LinkedHashMap<String[],String>(1); 383 splitUsingHashOnRDNExamples.put( 384 new String[] 385 { 386 "split-using-hash-on-rdn", 387 "--sourceLDIF", "whole.ldif", 388 "--targetLDIFBasePath", "split.ldif", 389 "--splitBaseDN", "ou=People,dc=example,dc=com", 390 "--numSets", "4", 391 "--schemaPath", "config/schema", 392 "--addEntriesOutsideSplitBaseDNToAllSets" 393 }, 394 INFO_SPLIT_LDIF_SC_HASH_ON_RDN_EXAMPLE.get()); 395 396 splitUsingHashOnRDN = new SubCommand("split-using-hash-on-rdn", 397 INFO_SPLIT_LDIF_SC_HASH_ON_RDN_DESC.get(), splitUsingHashOnRDNParser, 398 splitUsingHashOnRDNExamples); 399 splitUsingHashOnRDN.addName("hash-on-rdn"); 400 401 parser.addSubCommand(splitUsingHashOnRDN); 402 403 404 // Add the subcommand used to split entries using a hash on a specified 405 // attribute. 406 final ArgumentParser splitUsingHashOnAttributeParser = new ArgumentParser( 407 "split-using-hash-on-attribute", 408 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get()); 409 410 splitUsingHashOnAttributeAttributeName = new StringArgument(null, 411 "attributeName", true, 1, "{attr}", 412 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ATTR_NAME.get()); 413 splitUsingHashOnAttributeAttributeName.addLongIdentifier("attribute-name"); 414 splitUsingHashOnAttributeParser.addArgument( 415 splitUsingHashOnAttributeAttributeName); 416 417 splitUsingHashOnAttributeNumSets = new IntegerArgument(null, "numSets", 418 true, 1, null, INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_NUM_SETS.get(), 419 2, Integer.MAX_VALUE); 420 splitUsingHashOnAttributeNumSets.addLongIdentifier("num-sets"); 421 splitUsingHashOnAttributeParser.addArgument( 422 splitUsingHashOnAttributeNumSets); 423 424 splitUsingHashOnAttributeUseAllValues = new BooleanArgument(null, 425 "useAllValues", 1, 426 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ALL_VALUES.get()); 427 splitUsingHashOnAttributeUseAllValues.addLongIdentifier("use-all-values"); 428 splitUsingHashOnAttributeParser.addArgument( 429 splitUsingHashOnAttributeUseAllValues); 430 431 splitUsingHashOnAttributeAssumeFlatDIT = new BooleanArgument(null, 432 "assumeFlatDIT", 1, 433 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_ARG_DESC_ASSUME_FLAT_DIT.get()); 434 splitUsingHashOnAttributeAssumeFlatDIT.addLongIdentifier("assume-flat-dit"); 435 splitUsingHashOnAttributeParser.addArgument( 436 splitUsingHashOnAttributeAssumeFlatDIT); 437 438 final LinkedHashMap<String[],String> splitUsingHashOnAttributeExamples = 439 new LinkedHashMap<String[],String>(1); 440 splitUsingHashOnAttributeExamples.put( 441 new String[] 442 { 443 "split-using-hash-on-attribute", 444 "--sourceLDIF", "whole.ldif", 445 "--targetLDIFBasePath", "split.ldif", 446 "--splitBaseDN", "ou=People,dc=example,dc=com", 447 "--attributeName", "uid", 448 "--numSets", "4", 449 "--schemaPath", "config/schema", 450 "--addEntriesOutsideSplitBaseDNToAllSets" 451 }, 452 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_EXAMPLE.get()); 453 454 splitUsingHashOnAttribute = new SubCommand("split-using-hash-on-attribute", 455 INFO_SPLIT_LDIF_SC_HASH_ON_ATTR_DESC.get(), 456 splitUsingHashOnAttributeParser, splitUsingHashOnAttributeExamples); 457 splitUsingHashOnAttribute.addName("hash-on-attribute"); 458 459 parser.addSubCommand(splitUsingHashOnAttribute); 460 461 462 // Add the subcommand used to split entries by selecting the set with the 463 // fewest entries. 464 final ArgumentParser splitUsingFewestEntriesParser = new ArgumentParser( 465 "split-using-fewest-entries", 466 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get()); 467 468 splitUsingFewestEntriesNumSets = new IntegerArgument(null, "numSets", 469 true, 1, null, 470 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_NUM_SETS.get(), 471 2, Integer.MAX_VALUE); 472 splitUsingFewestEntriesNumSets.addLongIdentifier("num-sets"); 473 splitUsingFewestEntriesParser.addArgument(splitUsingFewestEntriesNumSets); 474 475 splitUsingFewestEntriesAssumeFlatDIT = new BooleanArgument(null, 476 "assumeFlatDIT", 1, 477 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_ARG_DESC_ASSUME_FLAT_DIT.get()); 478 splitUsingFewestEntriesAssumeFlatDIT.addLongIdentifier("assume-flat-dit"); 479 splitUsingFewestEntriesParser.addArgument( 480 splitUsingFewestEntriesAssumeFlatDIT); 481 482 final LinkedHashMap<String[],String> splitUsingFewestEntriesExamples = 483 new LinkedHashMap<String[],String>(1); 484 splitUsingFewestEntriesExamples.put( 485 new String[] 486 { 487 "split-using-fewest-entries", 488 "--sourceLDIF", "whole.ldif", 489 "--targetLDIFBasePath", "split.ldif", 490 "--splitBaseDN", "ou=People,dc=example,dc=com", 491 "--numSets", "4", 492 "--schemaPath", "config/schema", 493 "--addEntriesOutsideSplitBaseDNToAllSets" 494 }, 495 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_EXAMPLE.get()); 496 497 splitUsingFewestEntries = new SubCommand("split-using-fewest-entries", 498 INFO_SPLIT_LDIF_SC_FEWEST_ENTRIES_DESC.get(), 499 splitUsingFewestEntriesParser, splitUsingFewestEntriesExamples); 500 splitUsingFewestEntries.addName("fewest-entries"); 501 502 parser.addSubCommand(splitUsingFewestEntries); 503 504 505 // Add the subcommand used to split entries by selecting the set based on a 506 // filter. 507 final ArgumentParser splitUsingFilterParser = new ArgumentParser( 508 "split-using-filter", INFO_SPLIT_LDIF_SC_FILTER_DESC.get()); 509 510 splitUsingFilterFilter = new FilterArgument(null, "filter", true, 0, null, 511 INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_FILTER.get()); 512 splitUsingFilterParser.addArgument(splitUsingFilterFilter); 513 514 splitUsingFilterAssumeFlatDIT = new BooleanArgument(null, "assumeFlatDIT", 515 1, INFO_SPLIT_LDIF_SC_FILTER_ARG_DESC_ASSUME_FLAT_DIT.get()); 516 splitUsingFilterAssumeFlatDIT.addLongIdentifier("assume-flat-dit"); 517 splitUsingFilterParser.addArgument(splitUsingFilterAssumeFlatDIT); 518 519 final LinkedHashMap<String[],String> splitUsingFilterExamples = 520 new LinkedHashMap<String[],String>(1); 521 splitUsingFilterExamples.put( 522 new String[] 523 { 524 "split-using-filter", 525 "--sourceLDIF", "whole.ldif", 526 "--targetLDIFBasePath", "split.ldif", 527 "--splitBaseDN", "ou=People,dc=example,dc=com", 528 "--filter", "(timeZone=Eastern)", 529 "--filter", "(timeZone=Central)", 530 "--filter", "(timeZone=Mountain)", 531 "--filter", "(timeZone=Pacific)", 532 "--schemaPath", "config/schema", 533 "--addEntriesOutsideSplitBaseDNToAllSets" 534 }, 535 INFO_SPLIT_LDIF_SC_FILTER_EXAMPLE.get()); 536 537 splitUsingFilter = new SubCommand("split-using-filter", 538 INFO_SPLIT_LDIF_SC_FILTER_DESC.get(), 539 splitUsingFilterParser, splitUsingFilterExamples); 540 splitUsingFilter.addName("filter"); 541 542 parser.addSubCommand(splitUsingFilter); 543 } 544 545 546 547 /** 548 * {@inheritDoc} 549 */ 550 @Override() 551 public void doExtendedArgumentValidation() 552 throws ArgumentException 553 { 554 // If multiple sourceLDIF values were provided, then a target LDIF base path 555 // must have been given. 556 final List<File> sourceLDIFValues = sourceLDIF.getValues(); 557 if (sourceLDIFValues.size() > 1) 558 { 559 if (! targetLDIFBasePath.isPresent()) 560 { 561 throw new ArgumentException(ERR_SPLIT_LDIF_NO_TARGET_BASE_PATH.get( 562 sourceLDIF.getIdentifierString(), 563 targetLDIFBasePath.getIdentifierString())); 564 } 565 } 566 567 568 // If the split-using-filter subcommand was provided, then at least two 569 // filters must have been provided, and none of the filters can be logically 570 // equivalent to any of the others. 571 if (splitUsingFilter.isPresent()) 572 { 573 final List<Filter> filterList = splitUsingFilterFilter.getValues(); 574 final Set<Filter> filterSet = 575 new LinkedHashSet<Filter>(filterList.size()); 576 for (final Filter f : filterList) 577 { 578 if (filterSet.contains(f)) 579 { 580 throw new ArgumentException(ERR_SPLIT_LDIF_NON_UNIQUE_FILTER.get( 581 splitUsingFilterFilter.getIdentifierString(), f.toString())); 582 } 583 else 584 { 585 filterSet.add(f); 586 } 587 } 588 589 if (filterSet.size() < 2) 590 { 591 throw new ArgumentException(ERR_SPLIT_LDIF_NOT_ENOUGH_FILTERS.get( 592 splitUsingFilter.getPrimaryName(), 593 splitUsingFilterFilter.getIdentifierString())); 594 } 595 } 596 } 597 598 599 600 /** 601 * {@inheritDoc} 602 */ 603 @Override() 604 public ResultCode doToolProcessing() 605 { 606 // Get the schema to use during processing. 607 final Schema schema; 608 try 609 { 610 schema = getSchema(); 611 } 612 catch (final LDAPException le) 613 { 614 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, le.getMessage()); 615 return le.getResultCode(); 616 } 617 618 619 // Figure out which subcommand was selected, and create the appropriate 620 // translator to use to perform the processing. 621 final SplitLDIFTranslator translator; 622 if (splitUsingHashOnRDN.isPresent()) 623 { 624 translator = new SplitLDIFRDNHashTranslator(splitBaseDN.getValue(), 625 splitUsingHashOnRDNNumSets.getValue(), 626 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 627 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 628 } 629 else if (splitUsingHashOnAttribute.isPresent()) 630 { 631 translator = new SplitLDIFAttributeHashTranslator(splitBaseDN.getValue(), 632 splitUsingHashOnAttributeNumSets.getValue(), 633 splitUsingHashOnAttributeAttributeName.getValue(), 634 splitUsingHashOnAttributeUseAllValues.isPresent(), 635 splitUsingHashOnAttributeAssumeFlatDIT.isPresent(), 636 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 637 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 638 } 639 else if (splitUsingFewestEntries.isPresent()) 640 { 641 translator = new SplitLDIFFewestEntriesTranslator(splitBaseDN.getValue(), 642 splitUsingFewestEntriesNumSets.getValue(), 643 splitUsingFewestEntriesAssumeFlatDIT.isPresent(), 644 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 645 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 646 } 647 else if (splitUsingFilter.isPresent()) 648 { 649 final List<Filter> filterList = splitUsingFilterFilter.getValues(); 650 final LinkedHashSet<Filter> filterSet = 651 new LinkedHashSet<Filter>(filterList.size()); 652 for (final Filter f : filterList) 653 { 654 filterSet.add(f); 655 } 656 657 translator = new SplitLDIFFilterTranslator(splitBaseDN.getValue(), 658 schema, filterSet, splitUsingFilterAssumeFlatDIT.isPresent(), 659 addEntriesOutsideSplitBaseDNToAllSets.isPresent(), 660 addEntriesOutsideSplitBaseDNToDedicatedSet.isPresent()); 661 } 662 else 663 { 664 // This should never happen. 665 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 666 ERR_SPLIT_LDIF_CANNOT_DETERMINE_SPLIT_ALGORITHM.get( 667 splitUsingHashOnRDN.getPrimaryName() + ", " + 668 splitUsingHashOnAttribute.getPrimaryName() + ", " + 669 splitUsingFewestEntries.getPrimaryName() + ", " + 670 splitUsingFilter.getPrimaryName())); 671 return ResultCode.PARAM_ERROR; 672 } 673 674 675 // Create the LDIF reader. 676 final LDIFReader ldifReader; 677 try 678 { 679 InputStream inputStream; 680 if (sourceLDIF.isPresent()) 681 { 682 final List<File> sourceFiles = sourceLDIF.getValues(); 683 final ArrayList<InputStream> fileInputStreams = 684 new ArrayList<InputStream>(2*sourceFiles.size()); 685 for (final File f : sourceFiles) 686 { 687 if (! fileInputStreams.isEmpty()) 688 { 689 // Go ahead and ensure that there are at least new end-of-line 690 // markers between each file. Otherwise, it's possible for entries 691 // to run together. 692 final byte[] doubleEOL = new byte[StaticUtils.EOL_BYTES.length * 2]; 693 System.arraycopy(StaticUtils.EOL_BYTES, 0, doubleEOL, 0, 694 StaticUtils.EOL_BYTES.length); 695 System.arraycopy(StaticUtils.EOL_BYTES, 0, doubleEOL, 696 StaticUtils.EOL_BYTES.length, StaticUtils.EOL_BYTES.length); 697 fileInputStreams.add(new ByteArrayInputStream(doubleEOL)); 698 } 699 fileInputStreams.add(new FileInputStream(f)); 700 } 701 702 if (fileInputStreams.size() == 1) 703 { 704 inputStream = fileInputStreams.get(0); 705 } 706 else 707 { 708 inputStream = new AggregateInputStream(fileInputStreams); 709 } 710 } 711 else 712 { 713 inputStream = System.in; 714 } 715 716 if (sourceCompressed.isPresent()) 717 { 718 inputStream = new GZIPInputStream(inputStream); 719 } 720 721 ldifReader = new LDIFReader(inputStream, numThreads.getValue(), 722 translator); 723 if (schema != null) 724 { 725 ldifReader.setSchema(schema); 726 } 727 } 728 catch (final Exception e) 729 { 730 Debug.debugException(e); 731 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 732 ERR_SPLIT_LDIF_ERROR_CREATING_LDIF_READER.get( 733 StaticUtils.getExceptionMessage(e))); 734 return ResultCode.LOCAL_ERROR; 735 } 736 737 738 // Iterate through and process all of the entries. 739 ResultCode resultCode = ResultCode.SUCCESS; 740 final LinkedHashMap<String,OutputStream> outputStreams = 741 new LinkedHashMap<String,OutputStream>(10); 742 try 743 { 744 final AtomicLong entriesRead = new AtomicLong(0L); 745 final AtomicLong entriesExcluded = new AtomicLong(0L); 746 final TreeMap<String,AtomicLong> fileCounts = 747 new TreeMap<String,AtomicLong>(); 748 749readLoop: 750 while (true) 751 { 752 final SplitLDIFEntry entry; 753 try 754 { 755 entry = (SplitLDIFEntry) ldifReader.readEntry(); 756 } 757 catch (final LDIFException le) 758 { 759 Debug.debugException(le); 760 resultCode = ResultCode.LOCAL_ERROR; 761 762 final File f = getOutputFile(SplitLDIFEntry.SET_NAME_ERRORS); 763 OutputStream s = outputStreams.get(SplitLDIFEntry.SET_NAME_ERRORS); 764 if (s == null) 765 { 766 try 767 { 768 s = new FileOutputStream(f); 769 if (compressTarget.isPresent()) 770 { 771 s = new GZIPOutputStream(s); 772 } 773 774 outputStreams.put(SplitLDIFEntry.SET_NAME_ERRORS, s); 775 fileCounts.put(SplitLDIFEntry.SET_NAME_ERRORS, 776 new AtomicLong(0L)); 777 } 778 catch (final Exception e) 779 { 780 Debug.debugException(e); 781 resultCode = ResultCode.LOCAL_ERROR; 782 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 783 ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get( 784 f.getAbsolutePath(), 785 StaticUtils.getExceptionMessage(e))); 786 break readLoop; 787 } 788 } 789 790 final ByteStringBuffer buffer = new ByteStringBuffer(); 791 buffer.append("# "); 792 buffer.append(le.getMessage()); 793 buffer.append(StaticUtils.EOL_BYTES); 794 795 final List<String> dataLines = le.getDataLines(); 796 if (dataLines != null) 797 { 798 for (final String dataLine : dataLines) 799 { 800 buffer.append(dataLine); 801 buffer.append(StaticUtils.EOL_BYTES); 802 } 803 } 804 805 buffer.append(StaticUtils.EOL_BYTES); 806 807 try 808 { 809 s.write(buffer.toByteArray()); 810 } 811 catch (final Exception e) 812 { 813 Debug.debugException(e); 814 resultCode = ResultCode.LOCAL_ERROR; 815 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 816 ERR_SPLIT_LDIF_ERROR_WRITING_ERROR_TO_FILE.get( 817 le.getMessage(), f.getAbsolutePath(), 818 StaticUtils.getExceptionMessage(e))); 819 break readLoop; 820 } 821 822 if (le.mayContinueReading()) 823 { 824 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 825 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_RECOVERABLE.get( 826 StaticUtils.getExceptionMessage(le))); 827 continue; 828 } 829 else 830 { 831 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 832 ERR_SPLIT_LDIF_INVALID_LDIF_RECORD_UNRECOVERABLE.get( 833 StaticUtils.getExceptionMessage(le))); 834 break; 835 } 836 } 837 catch (final IOException ioe) 838 { 839 Debug.debugException(ioe); 840 resultCode = ResultCode.LOCAL_ERROR; 841 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 842 ERR_SPLIT_LDIF_IO_READ_ERROR.get( 843 StaticUtils.getExceptionMessage(ioe))); 844 break; 845 } 846 catch (final Exception e) 847 { 848 Debug.debugException(e); 849 resultCode = ResultCode.LOCAL_ERROR; 850 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 851 ERR_SPLIT_LDIF_UNEXPECTED_READ_ERROR.get( 852 StaticUtils.getExceptionMessage(e))); 853 break; 854 } 855 856 if (entry == null) 857 { 858 break; 859 } 860 861 final long readCount = entriesRead.incrementAndGet(); 862 if ((readCount % 1000L) == 0) 863 { 864 // Even though we aren't done with this entry yet, we'll go ahead and 865 // log a progress message now because it's easier to do that now than 866 // to ensure that it's handled properly through all possible error 867 // conditions that need to be handled below. 868 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 869 INFO_SPLIT_LDIF_PROGRESS.get(readCount)); 870 } 871 872 873 // Get the set(s) to which the entry should be written. If this is 874 // null (which could be the case as a result of a race condition when 875 // using multiple threads where processing for a child completes before 876 // processing for its parent, or as a result of a case in which a 877 // child is included without or before its parent), then try to see if 878 // we can get the sets by passing the entry through the translator. 879 Set<String> sets = entry.getSets(); 880 byte[] ldifBytes = entry.getLDIFBytes(); 881 if (sets == null) 882 { 883 try 884 { 885 sets = translator.translate(entry, 0L).getSets(); 886 } 887 catch (final Exception e) 888 { 889 Debug.debugException(e); 890 } 891 892 if (sets == null) 893 { 894 final SplitLDIFEntry errorEntry = translator.createEntry(entry, 895 ERR_SPLIT_LDIF_ENTRY_WITHOUT_PARENT.get( 896 entry.getDN(), splitBaseDN.getStringValue()), 897 Collections.singleton(SplitLDIFEntry.SET_NAME_ERRORS)); 898 ldifBytes = errorEntry.getLDIFBytes(); 899 sets = errorEntry.getSets(); 900 } 901 } 902 903 904 // If the entry shouldn't be written into any sets, then we don't need 905 // to do anything else. 906 if (sets.isEmpty()) 907 { 908 entriesExcluded.incrementAndGet(); 909 continue; 910 } 911 912 913 // Write the entry into each of the target sets, creating the output 914 // files if necessary. 915 for (final String set : sets) 916 { 917 if (set.equals(SplitLDIFEntry.SET_NAME_ERRORS)) 918 { 919 // This indicates that an error was encountered during processing, 920 // so we'll update the result code to reflect that. 921 resultCode = ResultCode.LOCAL_ERROR; 922 } 923 924 final File f = getOutputFile(set); 925 OutputStream s = outputStreams.get(set); 926 if (s == null) 927 { 928 try 929 { 930 s = new FileOutputStream(f); 931 if (compressTarget.isPresent()) 932 { 933 s = new GZIPOutputStream(s); 934 } 935 936 outputStreams.put(set, s); 937 fileCounts.put(set, new AtomicLong(0L)); 938 } 939 catch (final Exception e) 940 { 941 Debug.debugException(e); 942 resultCode = ResultCode.LOCAL_ERROR; 943 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 944 ERR_SPLIT_LDIF_CANNOT_OPEN_OUTPUT_FILE.get( 945 f.getAbsolutePath(), 946 StaticUtils.getExceptionMessage(e))); 947 break readLoop; 948 } 949 } 950 951 try 952 { 953 s.write(ldifBytes); 954 } 955 catch (final Exception e) 956 { 957 Debug.debugException(e); 958 resultCode = ResultCode.LOCAL_ERROR; 959 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 960 ERR_SPLIT_LDIF_ERROR_WRITING_TO_FILE.get( 961 entry.getDN(), f.getAbsolutePath(), 962 StaticUtils.getExceptionMessage(e))); 963 break readLoop; 964 } 965 966 fileCounts.get(set).incrementAndGet(); 967 } 968 } 969 970 971 // Processing is complete. Summarize the processing that was performed. 972 final long finalReadCount = entriesRead.get(); 973 if (finalReadCount > 1000L) 974 { 975 out(); 976 } 977 978 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 979 INFO_SPLIT_LDIF_PROCESSING_COMPLETE.get(finalReadCount)); 980 981 final long excludedCount = entriesExcluded.get(); 982 if (excludedCount > 0L) 983 { 984 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 985 INFO_SPLIT_LDIF_EXCLUDED_COUNT.get(excludedCount)); 986 } 987 988 for (final Map.Entry<String,AtomicLong> e : fileCounts.entrySet()) 989 { 990 final File f = getOutputFile(e.getKey()); 991 wrapOut(0, MAX_OUTPUT_LINE_LENGTH, 992 INFO_SPLIT_LDIF_COUNT_TO_FILE.get(e.getValue().get(), 993 f.getName())); 994 } 995 } 996 finally 997 { 998 try 999 { 1000 ldifReader.close(); 1001 } 1002 catch (final Exception e) 1003 { 1004 Debug.debugException(e); 1005 } 1006 1007 for (final Map.Entry<String,OutputStream> e : outputStreams.entrySet()) 1008 { 1009 try 1010 { 1011 e.getValue().close(); 1012 } 1013 catch (final Exception ex) 1014 { 1015 Debug.debugException(ex); 1016 resultCode = ResultCode.LOCAL_ERROR; 1017 wrapErr(0, MAX_OUTPUT_LINE_LENGTH, 1018 ERR_SPLIT_LDIF_ERROR_CLOSING_FILE.get( 1019 getOutputFile(e.getKey()), 1020 StaticUtils.getExceptionMessage(ex))); 1021 } 1022 } 1023 } 1024 1025 return resultCode; 1026 } 1027 1028 1029 1030 /** 1031 * Retrieves the schema that should be used for processing. 1032 * 1033 * @return The schema that was created. 1034 * 1035 * @throws LDAPException If a problem is encountered while retrieving the 1036 * schema. 1037 */ 1038 private Schema getSchema() 1039 throws LDAPException 1040 { 1041 // If any schema paths were specified, then load the schema only from those 1042 // paths. 1043 if (schemaPath.isPresent()) 1044 { 1045 final ArrayList<File> schemaFiles = new ArrayList<File>(10); 1046 for (final File path : schemaPath.getValues()) 1047 { 1048 if (path.isFile()) 1049 { 1050 schemaFiles.add(path); 1051 } 1052 else 1053 { 1054 final TreeMap<String,File> fileMap = new TreeMap<String,File>(); 1055 for (final File schemaDirFile : path.listFiles()) 1056 { 1057 final String name = schemaDirFile.getName(); 1058 if (schemaDirFile.isFile() && name.toLowerCase().endsWith(".ldif")) 1059 { 1060 fileMap.put(name, schemaDirFile); 1061 } 1062 } 1063 schemaFiles.addAll(fileMap.values()); 1064 } 1065 } 1066 1067 if (schemaFiles.isEmpty()) 1068 { 1069 throw new LDAPException(ResultCode.PARAM_ERROR, 1070 ERR_SPLIT_LDIF_NO_SCHEMA_FILES.get( 1071 schemaPath.getIdentifierString())); 1072 } 1073 else 1074 { 1075 try 1076 { 1077 return Schema.getSchema(schemaFiles); 1078 } 1079 catch (final Exception e) 1080 { 1081 Debug.debugException(e); 1082 throw new LDAPException(ResultCode.LOCAL_ERROR, 1083 ERR_SPLIT_LDIF_ERROR_LOADING_SCHEMA.get( 1084 StaticUtils.getExceptionMessage(e))); 1085 } 1086 } 1087 } 1088 else 1089 { 1090 // If the INSTANCE_ROOT environment variable is set and it refers to a 1091 // directory that has a config/schema subdirectory that has one or more 1092 // schema files in it, then read the schema from that directory. 1093 try 1094 { 1095 final String instanceRootStr = System.getenv("INSTANCE_ROOT"); 1096 if (instanceRootStr != null) 1097 { 1098 final File instanceRoot = new File(instanceRootStr); 1099 final File configDir = new File(instanceRoot, "config"); 1100 final File schemaDir = new File(configDir, "schema"); 1101 if (schemaDir.exists()) 1102 { 1103 final TreeMap<String,File> fileMap = new TreeMap<String,File>(); 1104 for (final File schemaDirFile : schemaDir.listFiles()) 1105 { 1106 final String name = schemaDirFile.getName(); 1107 if (schemaDirFile.isFile() && 1108 name.toLowerCase().endsWith(".ldif")) 1109 { 1110 fileMap.put(name, schemaDirFile); 1111 } 1112 } 1113 1114 if (! fileMap.isEmpty()) 1115 { 1116 return Schema.getSchema(new ArrayList<File>(fileMap.values())); 1117 } 1118 } 1119 } 1120 } 1121 catch (final Exception e) 1122 { 1123 Debug.debugException(e); 1124 } 1125 } 1126 1127 1128 // If we've gotten here, then just return null and the tool will try to use 1129 // the default standard schema. 1130 return null; 1131 } 1132 1133 1134 1135 /** 1136 * Retrieves a file object that refers to an output file with the provided 1137 * extension. 1138 * 1139 * @param extension The extension to use for the file. 1140 * 1141 * @return A file object that refers to an output file with the provided 1142 * extension. 1143 */ 1144 private File getOutputFile(final String extension) 1145 { 1146 final File baseFile; 1147 if (targetLDIFBasePath.isPresent()) 1148 { 1149 baseFile = targetLDIFBasePath.getValue(); 1150 } 1151 else 1152 { 1153 baseFile = sourceLDIF.getValue(); 1154 } 1155 1156 return new File(baseFile.getAbsolutePath() + extension); 1157 } 1158 1159 1160 1161 /** 1162 * {@inheritDoc} 1163 */ 1164 @Override() 1165 public LinkedHashMap<String[],String> getExampleUsages() 1166 { 1167 final LinkedHashMap<String[],String> exampleMap = 1168 new LinkedHashMap<String[],String>(4); 1169 1170 for (final Map.Entry<String[],String> e : 1171 splitUsingHashOnRDN.getExampleUsages().entrySet()) 1172 { 1173 exampleMap.put(e.getKey(), e.getValue()); 1174 } 1175 1176 for (final Map.Entry<String[],String> e : 1177 splitUsingHashOnAttribute.getExampleUsages().entrySet()) 1178 { 1179 exampleMap.put(e.getKey(), e.getValue()); 1180 } 1181 1182 for (final Map.Entry<String[],String> e : 1183 splitUsingFewestEntries.getExampleUsages().entrySet()) 1184 { 1185 exampleMap.put(e.getKey(), e.getValue()); 1186 } 1187 1188 for (final Map.Entry<String[],String> e : 1189 splitUsingFilter.getExampleUsages().entrySet()) 1190 { 1191 exampleMap.put(e.getKey(), e.getValue()); 1192 } 1193 1194 return exampleMap; 1195 } 1196}