This fetch script is used to copy the crawl data to the appropriate directories for all baseline update operations, including those performed with a delta update pipeline. The script is included in this section, with numbered steps indicating the actions performed at each point in the script.
Note that the script does not actually perform the baseline update itself; that update operation is managed by scripts in the AppConfig.xml document.
<script id="fetchFullCasCrawlData">
<![CDATA[
log.info("Fetching full CAS crawl data for processing.");
// try to acquire a lock on the complete crawl data directory
// for up to 10 minutes
if (LockManager.acquireLockBlocking("complete_cas_crawl_data_lock",
600))
// remove baseline data ready flag, ensuring baseline doesn't start
// before data is completely copied and ready for processing
LockManager.removeFlag("baseline_data_ready");
fullSrcDir = PathUtils.getAbsolutePath(CAS.getWorkingDir(),
CAS.getCasCrawlFullOutputDestDir()) + "/\\*";
incrSrcDir = PathUtils.getAbsolutePath(CAS.getWorkingDir(),
CAS.getCasCrawlIncrementalOutputDestDir()) + "/\\*";
fullDestDir = PathUtils.getAbsolutePath(Forge.getWorkingDir(),
Forge.getIncomingDataDir()) + "/full";
incrDestDir = PathUtils.getAbsolutePath(Forge.getWorkingDir(),
Forge.getIncomingDataDir()) + "/incremental";
// create destination directories
mkDirUtil = new CreateDirUtility(Forge.getAppName(),
Forge.getEacHost(), Forge.getEacPort(), Forge.isSslEnabled());
mkDirUtil.init(Forge.getHostId(), fullDestDir, Forge.getWorkingDir());
mkDirUtil.run();
mkDirUtil.init(Forge.getHostId(), incrDestDir, Forge.getWorkingDir());
mkDirUtil.run();
crawlDataCopy = new CopyUtility(Forge.getAppName(),
Forge.getEacHost(), Forge.getEacPort(), Forge.isSslEnabled());
// copy full crawl data
crawlDataCopy.init("copy_complete_cas_full_crawl_data",
CAS.getCasCrawlOutputDestHost(),Forge.getHostId(), fullSrcDir,
fullDestDir, true);
crawlDataCopy.run();
// copy incremental crawl data
crawlDataCopy.init("copy_complete_cas_incremental_crawl_data",
CAS.getCasCrawlOutputDestHost(),Forge.getHostId(), incrSrcDir,
incrDestDir, true);
crawlDataCopy.run();
// test for existing incremental files, since the dummy file is only
// needed when there are no real incremental files
if (! fileUtil.dirContainsFiles(incrDestDir, Forge.getHostId())) {
// create a variable for the dummy file name and location
placeholder = incrDestDir + "/placeholder.bin";
// create Unix touch and gzip commands
touchCmd = "touch " + placeholder;
zipCmd = "gzip " + placeholder;
// for Windows platforms, rewrite the commands using Win commands
if (System.getProperty("os.name").startsWith("Win")) {
touchCmd = "%ENDECA_ROOT%\\utilities\\touch.exe " + placeholder;
zipCmd = "%ENDECA_ROOT%\\utilities\\gzip.exe " + placeholder;
}
// use a ShellUtility to touch (i.e. create) the dummy file
shell = new ShellUtility(Forge.getAppName(), Forge.getEacHost(),
Forge.getEacPort(), Forge.isSslEnabled());
shell.init("create_incremental_cas_crawl_placeholder",
Forge.getHostId(),touchCmd, Forge.getWorkingDir());
shell.run();
// use the same ShellUtility to produce a .bin.gz compressed file
shell.init("zip_incremental_cas_crawl_placeholder",
Forge.getHostId(),zipCmd, Forge.getWorkingDir());
shell.run();
} // end of if clause
// incremental files do exist, so rename them
else {
// get the number of files, to be used to generate the prefix
incrFiles = fileUtil.getDirContents(incrDestDir, Forge.getHostId());
fileNum = incrFiles.size();
// import Java classes we will use for the renaming
import java.text.NumberFormat;
import java.text.DecimalFormat;
import java.util.SortedMap;
import java.util.TreeMap;
import java.io.File;
// instantiate a NumberFormat to format the prefix name
NumberFormat formatter = new DecimalFormat("000000");
// instantiate a SortedMap and add the file names,
// which will be in an ascending key order
SortedMap sortedFiles = new TreeMap();
sortedFiles.putAll(incrFiles);
// loop through the sorted treemap
for (incrFile : sortedFiles.keySet()) {
// generate a filename prefix, based on the number of files left
prefix = formatter.format(fileNum);
// get the original filename and prepend the generated prefix
origFileName = PathUtils.getFileNameFromPath(incrFile);
newFileName = prefix + "_" + origFileName;
// generate the pathname to which we will rename the file
absNewFile = PathUtils.getAbsolutePath(Forge.getWorkingDir(),
Forge.getIncomingDataDir()) + File.separator + "incremental" +
File.separator + newFileName;
// use the LocalMoveUtility to rename the file
renameUtil = new LocalMoveUtility(Forge.getAppName(),
Forge.getEacHost(), Forge.getEacPort(), Forge.isSslEnabled());
renameUtil.init(Forge.getHostId(), incrFile, absNewFile,
Forge.getWorkingDir());
renameUtil.run();
// decrease the fileNum variable by one so that the name of the
// next file will be numerically more recent
fileNum--;
} // end of for loop
} // end of else clause
// (re)set flag indicating that the baseline can process incoming data
LockManager.setFlag("baseline_data_ready");
// release lock on the crawl data directory
LockManager.releaseLock("complete_cas_crawl_data_lock");
...
log.info("Crawl data fetch script finished.");