public class HDFSRepositoryConnector extends BaseRepositoryConnector
| Modifier and Type | Class and Description |
|---|---|
protected static class |
HDFSRepositoryConnector.BackgroundStreamThread |
protected static class |
HDFSRepositoryConnector.CheckConnectionThread |
protected class |
HDFSRepositoryConnector.GetChildrenThread |
protected static class |
HDFSRepositoryConnector.GetObjectThread |
protected static class |
HDFSRepositoryConnector.GetSessionThread |
| Modifier and Type | Field and Description |
|---|---|
static String |
_rcsid |
protected static String[] |
activitiesList |
protected static String |
ACTIVITY_READ |
protected long |
lastSessionFetch |
protected String |
nameNodeHost |
protected String |
nameNodePort |
protected String |
nameNodeProtocol |
protected static String |
RELATIONSHIP_CHILD |
protected HDFSSession |
session |
protected static long |
timeToRelease |
protected String |
user |
currentContext, paramsGLOBAL_DENY_TOKEN, JOBMODE_CONTINUOUS, JOBMODE_ONCEONLY, MODEL_ADD, MODEL_ADD_CHANGE, MODEL_ADD_CHANGE_DELETE, MODEL_ALL, MODEL_CHAINED_ADD, MODEL_CHAINED_ADD_CHANGE, MODEL_CHAINED_ADD_CHANGE_DELETE, MODEL_PARTIAL| Constructor and Description |
|---|
HDFSRepositoryConnector() |
| Modifier and Type | Method and Description |
|---|---|
String |
addSeedDocuments(ISeedingActivity activities,
Specification spec,
String lastSeedVersion,
long seedTime,
int jobMode)
Queue "seed" documents.
|
String |
check()
Test the connection.
|
protected void |
checkConnection() |
protected static boolean |
checkInclude(String nameNode,
org.apache.hadoop.fs.FileStatus fileStatus,
String fileName,
Specification documentSpecification)
Check if a file or directory should be included, given a document specification.
|
protected static boolean |
checkIngest(String nameNode,
org.apache.hadoop.fs.FileStatus fileStatus,
Specification documentSpecification)
Check if a file should be ingested, given a document specification.
|
protected static boolean |
checkMatch(String sourceMatch,
int sourceIndex,
String match)
Check a match between two strings with wildcards.
|
protected void |
closeSession() |
void |
connect(ConfigParams configParams) |
protected static String |
convertToWGETURI(String path)
Convert a path to an HDFS wget URI.
|
void |
disconnect() |
protected static String |
findConvertPath(String nameNode,
Specification spec,
org.apache.hadoop.fs.Path theFile)
This method finds the part of the path that should be converted to a URI.
|
String[] |
getActivitiesList()
List the activities we might report on.
|
String[] |
getBinNames(String documentIdentifier)
For any given document, list the bins that it is a member of.
|
protected org.apache.hadoop.fs.FileStatus[] |
getChildren(org.apache.hadoop.fs.Path path) |
int |
getConnectorModel()
Tell the world what model this connector uses for getDocumentIdentifiers().
|
String |
getFormCheckJavascriptMethodName(int connectionSequenceNumber)
Obtain the name of the form check javascript method to call.
|
String |
getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
Obtain the name of the form presave check javascript method to call.
|
int |
getMaxDocumentRequest()
Get the maximum number of documents to amalgamate together into one
batch, for this connector.
|
protected org.apache.hadoop.fs.FileStatus |
getObject(org.apache.hadoop.fs.Path path) |
String[] |
getRelationshipTypes()
Return the list of relationship types that this connector recognizes.
|
protected HDFSSession |
getSession()
Set up a session
|
boolean |
isConnected()
This method is called to assess whether to count this connector instance should
actually be counted as being connected.
|
protected static String |
mapExtensionToMimeType(String fileName)
Map an extension to a mime type
|
protected static int |
matchSubPath(String subPath,
String fullPath)
Match a sub-path.
|
void |
outputConfigurationBody(IThreadContext threadContext,
IHTTPOutput out,
Locale locale,
ConfigParams parameters,
String tabName)
Output the configuration body section.
|
void |
outputConfigurationHeader(IThreadContext threadContext,
IHTTPOutput out,
Locale locale,
ConfigParams parameters,
List<String> tabsArray)
Output the configuration header section.
|
void |
outputSpecificationBody(IHTTPOutput out,
Locale locale,
Specification ds,
int connectionSequenceNumber,
int actualSequenceNumber,
String tabName)
Output the specification body section.
|
void |
outputSpecificationHeader(IHTTPOutput out,
Locale locale,
Specification ds,
int connectionSequenceNumber,
List<String> tabsArray)
Output the specification header section.
|
void |
poll() |
protected static boolean |
processCheck(boolean caseSensitive,
String sourceMatch,
int sourceIndex,
String match,
int matchIndex)
Recursive worker method for checkMatch.
|
String |
processConfigurationPost(IThreadContext threadContext,
IPostParameters variableContext,
ConfigParams parameters)
Process a configuration post.
|
void |
processDocuments(String[] documentIdentifiers,
IExistingVersions statuses,
Specification spec,
IProcessActivity activities,
int jobMode,
boolean usesDefaultAuthority)
Process a set of documents.
|
String |
processSpecificationPost(IPostParameters variableContext,
Locale locale,
Specification ds,
int connectionSequenceNumber)
Process a specification post.
|
void |
viewConfiguration(IThreadContext threadContext,
IHTTPOutput out,
Locale locale,
ConfigParams parameters)
View configuration.
|
void |
viewSpecification(IHTTPOutput out,
Locale locale,
Specification ds,
int connectionSequenceNumber)
View specification.
|
addSeedDocuments, addSeedDocuments, addSeedDocuments, getDocumentIdentifiers, getDocumentIdentifiers, getDocumentVersions, getDocumentVersions, getDocumentVersions, getDocumentVersions, getDocumentVersions, getDocumentVersions, getDocumentVersions, getRemainingDocumentIdentifiers, outputSpecificationBody, outputSpecificationBody, outputSpecificationHeader, outputSpecificationHeader, outputSpecificationHeader, processDocuments, processDocuments, processDocuments, processDocuments, processSpecificationPost, processSpecificationPost, releaseDocumentVersions, releaseDocumentVersions, requestInfo, viewSpecification, viewSpecificationclearThreadContext, deinstall, getConfiguration, install, outputConfigurationBody, outputConfigurationHeader, outputConfigurationHeader, pack, packFixedList, packList, packList, processConfigurationPost, setThreadContext, unpack, unpackFixedList, unpackList, viewConfigurationclone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitclearThreadContext, deinstall, getConfiguration, install, processConfigurationPost, setThreadContextpublic static final String _rcsid
protected static final String ACTIVITY_READ
protected static final String RELATIONSHIP_CHILD
protected static final String[] activitiesList
protected String nameNodeProtocol
protected String nameNodeHost
protected String nameNodePort
protected String user
protected HDFSSession session
protected long lastSessionFetch
protected static final long timeToRelease
public int getConnectorModel()
getConnectorModel in interface IRepositoryConnectorgetConnectorModel in class BaseRepositoryConnectorpublic String[] getRelationshipTypes()
getRelationshipTypes in interface IRepositoryConnectorgetRelationshipTypes in class BaseRepositoryConnectorpublic String[] getActivitiesList()
getActivitiesList in interface IRepositoryConnectorgetActivitiesList in class BaseRepositoryConnectorpublic String[] getBinNames(String documentIdentifier)
getBinNames in interface IRepositoryConnectorgetBinNames in class BaseRepositoryConnectorpublic int getMaxDocumentRequest()
getMaxDocumentRequest in interface IRepositoryConnectorgetMaxDocumentRequest in class BaseRepositoryConnectorpublic void connect(ConfigParams configParams)
connect in interface IConnectorconnect in class BaseConnectorpublic void disconnect()
throws ManifoldCFException
disconnect in interface IConnectordisconnect in class BaseConnectorManifoldCFExceptionprotected HDFSSession getSession() throws ManifoldCFException, ServiceInterruption
public String check() throws ManifoldCFException
check in interface IConnectorcheck in class BaseConnectorManifoldCFExceptionpublic boolean isConnected()
isConnected in interface IConnectorisConnected in class BaseConnectorpublic void poll()
throws ManifoldCFException
poll in interface IConnectorpoll in class BaseConnectorManifoldCFExceptionprotected void closeSession()
throws ManifoldCFException
ManifoldCFExceptionpublic String addSeedDocuments(ISeedingActivity activities, Specification spec, String lastSeedVersion, long seedTime, int jobMode) throws ManifoldCFException, ServiceInterruption
addSeedDocuments in interface IRepositoryConnectoraddSeedDocuments in class BaseRepositoryConnectoractivities - is the interface this method should use to perform whatever framework actions are desired.spec - is a document specification (that comes from the job).seedTime - is the end of the time range of documents to consider, exclusive.lastSeedVersionString - is the last seeding version string for this job, or null if the job has no previous seeding version string.jobMode - is an integer describing how the job is being run, whether continuous or once-only.ManifoldCFExceptionServiceInterruptionpublic void processDocuments(String[] documentIdentifiers, IExistingVersions statuses, Specification spec, IProcessActivity activities, int jobMode, boolean usesDefaultAuthority) throws ManifoldCFException, ServiceInterruption
processDocuments in interface IRepositoryConnectorprocessDocuments in class BaseRepositoryConnectordocumentIdentifiers - is the set of document identifiers to process.statuses - are the currently-stored document versions for each document in the set of document identifiers
passed in above.activities - is the interface this method should use to queue up new document references
and ingest documents.jobMode - is an integer describing how the job is being run, whether continuous or once-only.usesDefaultAuthority - will be true only if the authority in use for these documents is the default one.ManifoldCFExceptionServiceInterruptionpublic void outputConfigurationHeader(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, List<String> tabsArray) throws ManifoldCFException, IOException
outputConfigurationHeader in interface IConnectoroutputConfigurationHeader in class BaseConnectorthreadContext - is the local thread context.out - is the output to which any HTML should be sent.parameters - are the configuration parameters, as they currently exist, for this connection being configured.tabsArray - is an array of tab names. Add to this array any tab names that are specific to the connector.ManifoldCFExceptionIOExceptionpublic void outputConfigurationBody(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters, String tabName) throws ManifoldCFException, IOException
outputConfigurationBody in interface IConnectoroutputConfigurationBody in class BaseConnectorthreadContext - is the local thread context.out - is the output to which any HTML should be sent.parameters - are the configuration parameters, as they currently exist, for this connection being configured.tabName - is the current tab name.ManifoldCFExceptionIOExceptionpublic String processConfigurationPost(IThreadContext threadContext, IPostParameters variableContext, ConfigParams parameters) throws ManifoldCFException
processConfigurationPost in class BaseConnectorthreadContext - is the local thread context.variableContext - is the set of variables available from the post, including binary file post information.parameters - are the configuration parameters, as they currently exist, for this connection being configured.ManifoldCFExceptionpublic void viewConfiguration(IThreadContext threadContext, IHTTPOutput out, Locale locale, ConfigParams parameters) throws ManifoldCFException, IOException
viewConfiguration in interface IConnectorviewConfiguration in class BaseConnectorthreadContext - is the local thread context.out - is the output to which any HTML should be sent.parameters - are the configuration parameters, as they currently exist, for this connection being configured.ManifoldCFExceptionIOExceptionpublic String getFormCheckJavascriptMethodName(int connectionSequenceNumber)
getFormCheckJavascriptMethodName in interface IRepositoryConnectorgetFormCheckJavascriptMethodName in class BaseRepositoryConnectorconnectionSequenceNumber - is the unique number of this connection within the job.public String getFormPresaveCheckJavascriptMethodName(int connectionSequenceNumber)
getFormPresaveCheckJavascriptMethodName in interface IRepositoryConnectorgetFormPresaveCheckJavascriptMethodName in class BaseRepositoryConnectorconnectionSequenceNumber - is the unique number of this connection within the job.public void outputSpecificationHeader(IHTTPOutput out, Locale locale, Specification ds, int connectionSequenceNumber, List<String> tabsArray) throws ManifoldCFException, IOException
outputSpecificationHeader in interface IRepositoryConnectoroutputSpecificationHeader in class BaseRepositoryConnectorout - is the output to which any HTML should be sent.locale - is the locale the output is preferred to be in.ds - is the current document specification for this job.connectionSequenceNumber - is the unique number of this connection within the job.tabsArray - is an array of tab names. Add to this array any tab names that are specific to the connector.ManifoldCFExceptionIOExceptionpublic void outputSpecificationBody(IHTTPOutput out, Locale locale, Specification ds, int connectionSequenceNumber, int actualSequenceNumber, String tabName) throws ManifoldCFException, IOException
outputSpecificationBody in interface IRepositoryConnectoroutputSpecificationBody in class BaseRepositoryConnectorout - is the output to which any HTML should be sent.locale - is the locale the output is preferred to be in.ds - is the current document specification for this job.connectionSequenceNumber - is the unique number of this connection within the job.actualSequenceNumber - is the connection within the job that has currently been selected.tabName - is the current tab name. (actualSequenceNumber, tabName) form a unique tuple within
the job.ManifoldCFExceptionIOExceptionpublic String processSpecificationPost(IPostParameters variableContext, Locale locale, Specification ds, int connectionSequenceNumber) throws ManifoldCFException
processSpecificationPost in interface IRepositoryConnectorprocessSpecificationPost in class BaseRepositoryConnectorvariableContext - contains the post data, including binary file-upload information.locale - is the locale the output is preferred to be in.ds - is the current document specification for this job.connectionSequenceNumber - is the unique number of this connection within the job.ManifoldCFExceptionpublic void viewSpecification(IHTTPOutput out, Locale locale, Specification ds, int connectionSequenceNumber) throws ManifoldCFException, IOException
viewSpecification in interface IRepositoryConnectorviewSpecification in class BaseRepositoryConnectorout - is the output to which any HTML should be sent.locale - is the locale the output is preferred to be in.ds - is the current document specification for this job.connectionSequenceNumber - is the unique number of this connection within the job.ManifoldCFExceptionIOExceptionprotected static String convertToWGETURI(String path) throws ManifoldCFException
filePath - is the document filePath.repositoryPath - is the document repositoryPath.ManifoldCFExceptionprotected static String findConvertPath(String nameNode, Specification spec, org.apache.hadoop.fs.Path theFile)
spec - is the document specification.documentIdentifier - is the document identifier.protected static String mapExtensionToMimeType(String fileName)
protected static boolean checkInclude(String nameNode, org.apache.hadoop.fs.FileStatus fileStatus, String fileName, Specification documentSpecification) throws ManifoldCFException
fileName - is the canonical file name.documentSpecification - is the specification.ManifoldCFExceptionprotected static boolean checkIngest(String nameNode, org.apache.hadoop.fs.FileStatus fileStatus, Specification documentSpecification) throws ManifoldCFException
file - is the file.documentSpecification - is the specification.ManifoldCFExceptionprotected static int matchSubPath(String subPath, String fullPath)
subPath - is the sub path.fullPath - is the full path.protected static boolean checkMatch(String sourceMatch, int sourceIndex, String match)
sourceMatch - is the expanded string (no wildcards)sourceIndex - is the starting point in the expanded string.match - is the wildcard-based string.protected static boolean processCheck(boolean caseSensitive,
String sourceMatch,
int sourceIndex,
String match,
int matchIndex)
caseSensitive - is true if file names are case sensitive.sourceMatch - is the source string (w/o wildcards)sourceIndex - is the current point in the source string.match - is the match string (w/wildcards)matchIndex - is the current point in the match string.protected void checkConnection()
throws ManifoldCFException,
ServiceInterruption
protected org.apache.hadoop.fs.FileStatus[] getChildren(org.apache.hadoop.fs.Path path)
throws ManifoldCFException,
ServiceInterruption
protected org.apache.hadoop.fs.FileStatus getObject(org.apache.hadoop.fs.Path path)
throws ManifoldCFException,
ServiceInterruption