{
"Type": "AWS::DataPipeline::Pipeline",
"Properties": {
"Name": "DynamoDBInputS3OutputHive",
"Description": "Pipeline to backup DynamoDB data to S3",
"Activate": "true",
"ParameterObjects": [
{
"Id": "myDDBReadThroughputRatio",
"Attributes": [
{
"Key": "description",
"StringValue": "DynamoDB read throughput ratio"
},
{
"Key": "type",
"StringValue": "Double"
},
{
"Key": "default",
"StringValue": "0.2"
}
]
},
{
"Id": "myOutputS3Loc",
"Attributes": [
{
"Key": "description",
"StringValue": "S3 output bucket"
},
{
"Key": "type",
"StringValue": "AWS::S3::ObjectKey"
},
{
"Key": "default",
"StringValue": { "Fn::Join" : [ "", [ "s3://", { "Ref": "S3OutputLoc" } ] ] }
}
]
},
{
"Id": "myDDBTableName",
"Attributes": [
{
"Key": "description",
"StringValue": "DynamoDB Table Name "
},
{
"Key": "type",
"StringValue": "String"
}
]
}
],
"ParameterValues": [
{
"Id": "myDDBTableName",
"StringValue": { "Ref": "TableName" }
}
],
"PipelineObjects": [
{
"Id": "S3BackupLocation",
"Name": "Copy data to this S3 location",
"Fields": [
{
"Key": "type",
"StringValue": "S3DataNode"
},
{
"Key": "dataFormat",
"RefValue": "DDBExportFormat"
},
{
"Key": "directoryPath",
"StringValue": "#{myOutputS3Loc}/#{format(@scheduledStartTime, 'YYYY-MM-dd-HH-mm-ss')}"
}
]
},
{
"Id": "DDBSourceTable",
"Name": "DDBSourceTable",
"Fields": [
{
"Key": "tableName",
"StringValue": "#{myDDBTableName}"
},
{
"Key": "type",
"StringValue": "DynamoDBDataNode"
},
{
"Key": "dataFormat",
"RefValue": "DDBExportFormat"
},
{
"Key": "readThroughputPercent",
"StringValue": "#{myDDBReadThroughputRatio}"
}
]
},
{
"Id": "DDBExportFormat",
"Name": "DDBExportFormat",
"Fields": [
{
"Key": "type",
"StringValue": "DynamoDBExportDataFormat"
}
]
},
{
"Id": "TableBackupActivity",
"Name": "TableBackupActivity",
"Fields": [
{
"Key": "resizeClusterBeforeRunning",
"StringValue": "true"
},
{
"Key": "type",
"StringValue": "HiveCopyActivity"
},
{
"Key": "input",
"RefValue": "DDBSourceTable"
},
{
"Key": "runsOn",
"RefValue": "EmrClusterForBackup"
},
{
"Key": "output",
"RefValue": "S3BackupLocation"
}
]
},
{
"Id": "DefaultSchedule",
"Name": "RunOnce",
"Fields": [
{
"Key": "occurrences",
"StringValue": "1"
},
{
"Key": "startAt",
"StringValue": "FIRST_ACTIVATION_DATE_TIME"
},
{
"Key": "type",
"StringValue": "Schedule"
},
{
"Key": "period",
"StringValue": "1 Day"
}
]
},
{
"Id": "Default",
"Name": "Default",
"Fields": [
{
"Key": "type",
"StringValue": "Default"
},
{
"Key": "scheduleType",
"StringValue": "cron"
},
{
"Key": "failureAndRerunMode",
"StringValue": "CASCADE"
},
{
"Key": "role",
"StringValue": "DataPipelineDefaultRole"
},
{
"Key": "resourceRole",
"StringValue": "DataPipelineDefaultResourceRole"
},
{
"Key": "schedule",
"RefValue": "DefaultSchedule"
}
]
},
{
"Id": "EmrClusterForBackup",
"Name": "EmrClusterForBackup",
"Fields": [
{
"Key": "terminateAfter",
"StringValue": "2 Hours"
},
{
"Key": "amiVersion",
"StringValue": "3.3.2"
},
{
"Key": "masterInstanceType",
"StringValue": "m1.medium"
},
{
"Key": "coreInstanceType",
"StringValue": "m1.medium"
},
{
"Key": "coreInstanceCount",
"StringValue": "1"
},
{
"Key": "type",
"StringValue": "EmrCluster"
}
]
}
]
}
}