public class ExternalSort extends Object
| Modifier and Type | Field and Description |
|---|---|
static Comparator<String> |
defaultcomparator |
| Constructor and Description |
|---|
ExternalSort() |
| Modifier and Type | Method and Description |
|---|---|
static void |
displayUsage() |
static long |
estimateBestSizeOfBlocks(File filetobesorted,
int maxtmpfiles) |
static void |
main(String[] args) |
static int |
merge(BufferedWriter fbw,
Comparator<String> cmp,
boolean distinct,
List<org.apache.jackrabbit.oak.commons.sort.BinaryFileBuffer> buffers)
This merges several BinaryFileBuffer to an output writer.
|
static int |
mergeSortedFiles(List<File> files,
File outputfile)
This merges a bunch of temporary flat files
|
static int |
mergeSortedFiles(List<File> files,
File outputfile,
Comparator<String> cmp)
This merges a bunch of temporary flat files
|
static int |
mergeSortedFiles(List<File> files,
File outputfile,
Comparator<String> cmp,
boolean distinct)
This merges a bunch of temporary flat files
|
static int |
mergeSortedFiles(List<File> files,
File outputfile,
Comparator<String> cmp,
Charset cs)
This merges a bunch of temporary flat files
|
static int |
mergeSortedFiles(List<File> files,
File outputfile,
Comparator<String> cmp,
Charset cs,
boolean distinct)
This merges a bunch of temporary flat files
|
static int |
mergeSortedFiles(List<File> files,
File outputfile,
Comparator<String> cmp,
Charset cs,
boolean distinct,
boolean append,
boolean usegzip)
This merges a bunch of temporary flat files
|
static void |
sort(File input,
File output) |
static File |
sortAndSave(List<String> tmplist,
Comparator<String> cmp,
Charset cs,
File tmpdirectory)
Sort a list and save it to a temporary file
|
static File |
sortAndSave(List<String> tmplist,
Comparator<String> cmp,
Charset cs,
File tmpdirectory,
boolean distinct,
boolean usegzip)
Sort a list and save it to a temporary file
|
static List<File> |
sortInBatch(File file)
This will simply load the file by blocks of lines, then sort them in-memory, and write the
result to temporary files that have to be merged later.
|
static List<File> |
sortInBatch(File file,
Comparator<String> cmp)
This will simply load the file by blocks of lines, then sort them in-memory, and write the
result to temporary files that have to be merged later.
|
static List<File> |
sortInBatch(File file,
Comparator<String> cmp,
boolean distinct)
This will simply load the file by blocks of lines, then sort them in-memory, and write the
result to temporary files that have to be merged later.
|
static List<File> |
sortInBatch(File file,
Comparator<String> cmp,
int maxtmpfiles,
Charset cs,
File tmpdirectory,
boolean distinct)
This will simply load the file by blocks of lines, then sort them in-memory, and write the
result to temporary files that have to be merged later.
|
static List<File> |
sortInBatch(File file,
Comparator<String> cmp,
int maxtmpfiles,
Charset cs,
File tmpdirectory,
boolean distinct,
int numHeader,
boolean usegzip)
This will simply load the file by blocks of lines, then sort them in-memory, and write the
result to temporary files that have to be merged later.
|
public static Comparator<String> defaultcomparator
public static void sort(File input, File output) throws IOException
IOExceptionpublic static long estimateBestSizeOfBlocks(File filetobesorted, int maxtmpfiles)
public static List<File> sortInBatch(File file) throws IOException
file - some flat filecmp - string comparatorIOExceptionpublic static List<File> sortInBatch(File file, Comparator<String> cmp) throws IOException
file - some flat filecmp - string comparatorIOExceptionpublic static List<File> sortInBatch(File file, Comparator<String> cmp, boolean distinct) throws IOException
file - some flat filecmp - string comparatordistinct - Pass true if duplicate lines should be discarded.IOExceptionpublic static List<File> sortInBatch(File file, Comparator<String> cmp, int maxtmpfiles, Charset cs, File tmpdirectory, boolean distinct, int numHeader, boolean usegzip) throws IOException
file - some flat filecmp - string comparatormaxtmpfiles - maximal number of temporary filesCharset - character set to use (can use Charset.defaultCharset())tmpdirectory - location of the temporary files (set to null for default location)distinct - Pass true if duplicate lines should be discarded.numHeader - number of lines to preclude before sorting startsIOExceptionpublic static List<File> sortInBatch(File file, Comparator<String> cmp, int maxtmpfiles, Charset cs, File tmpdirectory, boolean distinct) throws IOException
file - some flat filecmp - string comparatormaxtmpfiles - maximal number of temporary filesCharset - character set to use (can use Charset.defaultCharset())tmpdirectory - location of the temporary files (set to null for default location)distinct - Pass true if duplicate lines should be discarded.IOExceptionpublic static File sortAndSave(List<String> tmplist, Comparator<String> cmp, Charset cs, File tmpdirectory, boolean distinct, boolean usegzip) throws IOException
tmplist - data to be sortedcmp - string comparatorcs - charset to use for output (can use Charset.defaultCharset())tmpdirectory - location of the temporary files (set to null for default location)distinct - Pass true if duplicate lines should be discarded.IOExceptionpublic static File sortAndSave(List<String> tmplist, Comparator<String> cmp, Charset cs, File tmpdirectory) throws IOException
tmplist - data to be sortedcmp - string comparatorcs - charset to use for output (can use Charset.defaultCharset())tmpdirectory - location of the temporary files (set to null for default location)IOExceptionpublic static int mergeSortedFiles(List<File> files, File outputfile) throws IOException
files - output - fileIOExceptionpublic static int mergeSortedFiles(List<File> files, File outputfile, Comparator<String> cmp) throws IOException
files - output - fileIOExceptionpublic static int mergeSortedFiles(List<File> files, File outputfile, Comparator<String> cmp, boolean distinct) throws IOException
files - output - fileIOExceptionpublic static int mergeSortedFiles(List<File> files, File outputfile, Comparator<String> cmp, Charset cs, boolean distinct, boolean append, boolean usegzip) throws IOException
files - The List of sorted Files to be merged.Charset - character set to use to load the stringsdistinct - Pass true if duplicate lines should be discarded. (elchetz@gmail.com)outputfile - The output File to merge the results to.cmp - The Comparator to use to compare Strings.cs - The Charset to be used for the byte to character conversion.append - Pass true if result should append to File instead of
overwrite. Default to be false for overloading methods.usegzip - assumes we used gzip compression for temporary filesIOExceptionpublic static int merge(BufferedWriter fbw, Comparator<String> cmp, boolean distinct, List<org.apache.jackrabbit.oak.commons.sort.BinaryFileBuffer> buffers) throws IOException
BufferedWriter - A buffer where we write the data.cmp - A comparator object that tells us how to sort the lines.distinct - Pass true if duplicate lines should be discarded. (elchetz@gmail.com)buffers - Where the data should be read.IOExceptionpublic static int mergeSortedFiles(List<File> files, File outputfile, Comparator<String> cmp, Charset cs, boolean distinct) throws IOException
files - The List of sorted Files to be merged.Charset - character set to use to load the stringsdistinct - Pass true if duplicate lines should be discarded. (elchetz@gmail.com)outputfile - The output File to merge the results to.cmp - The Comparator to use to compare Strings.cs - The Charset to be used for the byte to character conversion.IOExceptionpublic static int mergeSortedFiles(List<File> files, File outputfile, Comparator<String> cmp, Charset cs) throws IOException
files - output - fileCharset - character set to use to load the stringsIOExceptionpublic static void displayUsage()
public static void main(String[] args) throws IOException
IOExceptionCopyright © 2012-2014 The Apache Software Foundation. All Rights Reserved.