From 8a28eef5da5f35ecbabf45ed97bb6d8ecb637cec Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Thu, 14 Jul 2016 17:08:48 -0400 Subject: [PATCH 01/12] Refactor to use a json configuration file and/or prompt for information --- README.md | 16 ++++++++-- config.example.json | 4 +++ index.js | 17 +++++----- lib/index.js | 77 +++++++++++++++++++++++++++++++-------------- package.json | 1 + 5 files changed, 82 insertions(+), 33 deletions(-) create mode 100644 config.example.json diff --git a/README.md b/README.md index 47f54ca..51bfde8 100644 --- a/README.md +++ b/README.md @@ -10,11 +10,21 @@ This utility won't save the files anywhere else. You can save the results to a f ## usage ``` -$ node index.js MY_APP_ID MY_MASTER_KEY +$ node index.js ``` -you can optionally specify a server URL +you can optionally specify a json configuration file ``` -$ node index.js MY_APP_ID MY_MASTER_KEY MY_SERVER_URL +$ node index.js ./config.json +``` + +Example `config.json`: + +``` +{ + "applicationId": "PVpAyhBmNuouwPBNksRLPVpAyhBmNuouwPBNksRL", + "masterKey": "DQzeY9lelKLPeWQH6zhsNUqnrudyOU07hjC6g53a", + "serverURL": "http://parse-server.yourdomain.com/parse" +} ``` \ No newline at end of file diff --git a/config.example.json b/config.example.json new file mode 100644 index 0000000..51e237d --- /dev/null +++ b/config.example.json @@ -0,0 +1,4 @@ +{ + "applicationId": "61U6Vj3VbzAgotSaSiKJ2Jr4RDIYUtcCCoTbQaJR", + "masterKey": "UeHG7Bpd7OqSwpU5eAeA5TCXeHVPpPCrGLXWqeW2" +} \ No newline at end of file diff --git a/index.js b/index.js index 43c72ec..afc725b 100644 --- a/index.js +++ b/index.js @@ -1,10 +1,13 @@ -var appID = process.argv[2]; -var masterKey = process.argv[3]; -var serverURL = process.argv[4]; +var configFilePath = process.argv[2]; +var config = {}; -if (!appID || !masterKey) { - process.stderr.write('An appId and a masterKey are required\n'); - process.exit(1); +if (configFilePath) { + try { + config = require(configFilePath); + } catch(e) { + console.log('Cannot load '+configFilePath); + process.exit(1); + } } -var utils = require('./lib')(appID, masterKey, serverURL); +var utils = require('./lib')(config); diff --git a/lib/index.js b/lib/index.js index 132a3a3..a97a860 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,6 +1,59 @@ +var inquirer = require('inquirer'); var Parse = require('parse/node'); var schemas = require('./schemas'); +module.exports = initialize; + +function initialize(config) { + var questions = [ + { + type: 'input', + name: 'applicationId', + message: 'The applicationId', + when: !config.applicationId + }, { + type: 'input', + name: 'masterKey', + message: 'The masterKey', + when: !config.masterKey + }, + { + type: 'input', + name: 'serverURL', + message: 'The serverURL', + when: !config.serverURL, + default: 'https://api.parse.com/1' + } + ]; + + inquirer.prompt(questions).then(function (answers) { + config = Object.assign(config, answers); + Parse.initialize(config.applicationId, null, config.masterKey); + Parse.serverURL = config.serverURL; + printAllFiles(); + }); +} + +function printAllFiles() { + schemas.get().then(function(res){ + var schemasWithFiles = onlyFiles(res); + return Promise.all(schemasWithFiles.map(getFilesFromSchema)); + }).then(function(results) { + var files = results.reduce(function(c, r) { + return c.concat(r); + }, []); + files.forEach(function(file) { + process.stdout.write(file); + process.stdout.write("\n"); + }); + process.exit(0); + }).catch(function(err){ + process.stderr.write(err); + process.stderr.write("\n"); + process.exit(1); + }); +} + function onlyFiles(schemas) { return schemas.map(function(schema) { var fileFields = Object.keys(schema.fields).filter(function(key){ @@ -45,26 +98,4 @@ function getFilesFromSchema(schema) { })) }, []); }); -} - -module.exports = function(applicationId, masterKey, serverURL) { - Parse.initialize(applicationId, null, masterKey); - Parse.serverURL = serverURL || "https://api.parse.com/1"; - schemas.get().then(function(res){ - var schemasWithFiles = onlyFiles(res); - return Promise.all(schemasWithFiles.map(getFilesFromSchema)); - }).then(function(results) { - var files = results.reduce(function(c, r) { - return c.concat(r); - }, []); - files.forEach(function(file) { - process.stdout.write(file); - process.stdout.write("\n"); - }); - process.exit(0); - }).catch(function(err){ - process.stderr.write(err); - process.stderr.write("\n"); - process.exit(1); - }) -} +} \ No newline at end of file diff --git a/package.json b/package.json index bf6c969..613f117 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ }, "homepage": "https://github.com/parse-server-modules/parse-files-utils#readme", "dependencies": { + "inquirer": "^1.1.2", "parse": "^1.8.5", "request": "^2.72.0" } From 6b1febaa40efa1e100d6c50c8272c6c4e4c007d7 Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Sun, 17 Jul 2016 21:38:23 -0400 Subject: [PATCH 02/12] Implement file transfer to filesystem, S3, and GCS with database file rename - Comprehensive prompt system to collect all information - Or specify all options in a config file - Options to transfer only Parse hosted files, Parse Server files, or all files - Processes 5 files at a time - Logs progress and errors to console --- config.example.js | 21 ++++ config.example.json | 4 - lib/index.js | 85 +++++++------- lib/questions.js | 142 +++++++++++++++++++++++ lib/transfer.js | 273 ++++++++++++++++++++++++++++++++++++++++++++ package.json | 3 + 6 files changed, 479 insertions(+), 49 deletions(-) create mode 100644 config.example.js delete mode 100644 config.example.json create mode 100644 lib/questions.js create mode 100644 lib/transfer.js diff --git a/config.example.js b/config.example.js new file mode 100644 index 0000000..92f0e72 --- /dev/null +++ b/config.example.js @@ -0,0 +1,21 @@ +module.exports = { + applicationId: "PARSE_APPLICATION_ID", + masterKey: "PARSE_MASTER_KEY", + mongoURL: "mongodb://:@mongourl.com:27017/database_name", + serverURL: "https://api.customparseserver.com/parse", + filesToTransfer: 'parseOnly', + renameInDatabase: true, + + // For filesystem configuration + filesystemPath: './downloaded_files', + + // For S3 configuration + aws_accessKeyId: "ACCESS_KEY_ID", + aws_secretAccessKey: "SECRET_ACCESS_KEY", + aws_bucket: "BUCKET_NAME", + + // For GCS configuration + gcs_projectId: "GCS_PROJECT_ID", + gcs_keyFilename: "credentials.json", + gcs_bucket: "BUCKET_NAME" +}; \ No newline at end of file diff --git a/config.example.json b/config.example.json deleted file mode 100644 index 51e237d..0000000 --- a/config.example.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "applicationId": "61U6Vj3VbzAgotSaSiKJ2Jr4RDIYUtcCCoTbQaJR", - "masterKey": "UeHG7Bpd7OqSwpU5eAeA5TCXeHVPpPCrGLXWqeW2" -} \ No newline at end of file diff --git a/lib/index.js b/lib/index.js index a97a860..302d3ea 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,56 +1,42 @@ -var inquirer = require('inquirer'); var Parse = require('parse/node'); var schemas = require('./schemas'); +var transfer = require('./transfer'); + +var questions = require('./questions.js'); module.exports = initialize; function initialize(config) { - var questions = [ - { - type: 'input', - name: 'applicationId', - message: 'The applicationId', - when: !config.applicationId - }, { - type: 'input', - name: 'masterKey', - message: 'The masterKey', - when: !config.masterKey - }, - { - type: 'input', - name: 'serverURL', - message: 'The serverURL', - when: !config.serverURL, - default: 'https://api.parse.com/1' - } - ]; - - inquirer.prompt(questions).then(function (answers) { + questions(config).then(function (answers) { config = Object.assign(config, answers); Parse.initialize(config.applicationId, null, config.masterKey); Parse.serverURL = config.serverURL; - printAllFiles(); + return transfer.init(config).then(function() { + return getAllFileObjects(); + }).then(function(objects) { + return transfer.run(objects); + }).then(function() { + console.log('Complete!'); + process.exit(); + }).catch(function(error) { + console.log(error); + process.exit(1); + }); }); } -function printAllFiles() { - schemas.get().then(function(res){ - var schemasWithFiles = onlyFiles(res); - return Promise.all(schemasWithFiles.map(getFilesFromSchema)); +function getAllFileObjects() { + console.log("Fetching schema..."); + return schemas.get().then(function(res){ + console.log("Fetching all objects with files..."); + var schemasWithFiles = onlyFiles(res); + console.log('swf', schemasWithFiles); + return Promise.all(schemasWithFiles.map(getObjectsWithFilesFromSchema)); }).then(function(results) { - var files = results.reduce(function(c, r) { - return c.concat(r); - }, []); - files.forEach(function(file) { - process.stdout.write(file); - process.stdout.write("\n"); - }); - process.exit(0); - }).catch(function(err){ - process.stderr.write(err); - process.stderr.write("\n"); - process.exit(1); + var files = results.reduce(function(c, r) { + return c.concat(r); + }, []); + return Promise.resolve(files); }); } @@ -85,17 +71,26 @@ function getAllObjects(baseQuery) { return next(0); } -function getFilesFromSchema(schema) { +function getObjectsWithFilesFromSchema(schema) { var query = new Parse.Query(schema.className); query.select(schema.fields); + query.limit(1000); schema.fields.forEach(function(field) { query.exists(field); - }) + }); return getAllObjects(query).then(function(results) { return results.reduce(function(current, result){ - return current.concat(schema.fields.map(function(field){ - return result.get(field).url(); - })) + return current.concat( + schema.fields.map(function(field){ + return { + className: schema.className, + objectId: result.id, + fieldName: field, + fileName: result.get(field).name(), + url: result.get(field).url() + } + }) + ); }, []); }); } \ No newline at end of file diff --git a/lib/questions.js b/lib/questions.js new file mode 100644 index 0000000..79a68d9 --- /dev/null +++ b/lib/questions.js @@ -0,0 +1,142 @@ +/** + * Uses command line prompts to collect necessary info + */ + +var inquirer = require('inquirer'); +module.exports = questions; + +function questions(config) { + return inquirer.prompt([ + // Collect Parse info + { + type: 'input', + name: 'applicationId', + message: 'The applicationId', + when: !config.applicationId + }, { + type: 'input', + name: 'masterKey', + message: 'The masterKey', + when: !config.masterKey + }, { + type: 'input', + name: 'serverURL', + message: 'The Parse serverURL', + when: !config.serverURL, + default: 'https://api.parse.com/1' + }, { + type: 'list', + name: 'filesToTransfer', + message: 'What files would you like to transfer?', + choices: [ + {name: 'Only parse.com hosted files', value: 'parseOnly'}, + {name: 'Only Parse Server (self hosted server) files', value: 'parseServerOnly'}, + {name: 'All files', value: 'all'} + ], + when: (['parseOnly','parseServerOnly', 'all'].indexOf(config.filesToTransfer) == -1) + }, { + type: 'confirm', + name: 'renameInDatabase', + message: 'Rename Parse hosted files in the database after transfer?', + default: true, + when: function(answers) { + return !config.renameInDatabase && + (answers.filesToTransfer == 'all' || + answers.filesToTransfer == 'parseOnly'); + } + }, { + type: 'input', + name: 'mongoURL', + message: 'MongoDB URL', + default: 'mongodb://localhost:27017/database', + when: function(answers) { + return (config.renameInDatabase || answers.renameInDatabase) && + !config.mongoURL; + } + }, + + // Where to transfer to + { + type: 'list', + name: 'transferTo', + message: 'Where would you like to transfer files to?', + choices: [ + {name: 'Print List of URLs', value: 'print'}, + {name: 'Local File System', value: 'filesystem'}, + {name: 'AWS S3', value: 's3'}, + {name: 'Google Cloud Storage', value: 'gcs'}, + ], + when: (['print','filesystem','s3','gcs'].indexOf(config.transferTo) == -1) + }, + + // filesystem settings + { + type: 'input', + name: 'filesystemPath', + message: 'Local filesystem path to save files to', + when: function(answers) { + return !config.filesystemPath && + (config.transferTo == 'filesystem' || + answers.transferTo == 'filesystem'); + }, + default: './downloaded_files' + }, + + // S3 settings + { + type: 'input', + name: 'aws_accessKeyId', + message: 'AWS access key id', + when: function(answers) { + return (answers.transferTo == 's3' || config.transferTo == 's3') && + !config.aws_accessKeyId && + !config.aws_profile; + } + }, { + type: 'input', + name: 'aws_secretAccessKey', + message: 'AWS secret access key', + when: function(answers) { + return (answers.transferTo == 's3' || config.transferTo == 's3') && + !config.aws_secretAccessKey && + !config.aws_profile; + } + }, { + type: 'input', + name: 'aws_bucket', + message: 'S3 bucket name', + when: function(answers) { + return (answers.transferTo == 's3' || config.transferTo == 's3') && + !config.aws_bucket; + } + }, + + // GCS settings + { + type: 'input', + name: 'gcs_projectId', + message: 'GCS project id', + when: function(answers) { + return (answers.transferTo == 'gcs' || config.transferTo == 'gcs') && + !config.gcs_projectId; + } + }, { + type: 'input', + name: 'gcs_keyFilename', + message: 'GCS key filename', + when: function(answers) { + return (answers.transferTo == 'gcs' || config.transferTo == 'gcs') && + !config.gcs_keyFilename; + }, + default: 'credentials.json' + }, { + type: 'input', + name: 'gcs_bucket', + message: 'GCS bucket name', + when: function(answers) { + return (answers.transferTo == 'gcs' || config.transferTo == 'gcs') && + !config.gcs_bucket; + } + }, + ]); +} \ No newline at end of file diff --git a/lib/transfer.js b/lib/transfer.js new file mode 100644 index 0000000..5c378c0 --- /dev/null +++ b/lib/transfer.js @@ -0,0 +1,273 @@ +var fs = require('fs'); +var request = require('request'); +var crypto = require('crypto'); +var async = require('async'); +var AWS = require('aws-sdk'); +var GCS = require('gcloud').storage; +var MongoClient = require('mongodb').MongoClient; + +// regex that matches old legacy Parse hosted files +var legacyFilesPrefixRegex = new RegExp("^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}-"); + +var db, config, s3Client, gcsClient; +var fileHandlers = { + print: print, + filesystem: filesystem, + s3: s3, + gcs: gcs, +}; + +module.exports.init = init; +module.exports.run = run; + +function init(options) { + console.log('Initializing transfer configuration...'); + config = options; + return new Promise(function(resolve, reject) { + if (config.renameInDatabase) { + console.log('Connecting to MongoDB'); + MongoClient.connect(config.mongoURL, function(error, database) { + if (error) { + return reject(error); + } + console.log('Successfully connected to MongoDB'); + db = database; + _setup().then(resolve, reject); + }); + } else { + _setup().then(resolve, reject); + } + }); +} + +function _setup() { + return new Promise(function(resolve, reject) { + if (config.transferTo == 'print') { + resolve(); + } else if (config.transferTo == 'filesystem') { + console.log('Creating directory at '+config.filesystemPath); + process.umask(0); + fs.mkdir(config.filesystemPath, function() { + resolve(); + }); + } else if (config.transferTo == 's3') { + console.log('Initializing S3 connection') + if (config.aws_accessKeyId && config.aws_secretAccessKey) { + AWS.config.credentials = new AWS.Credentials(config.aws_accessKeyId, config.aws_secretAccessKey); + } else if (config.aws_profile) { + AWS.config.credentials = new AWS.SharedIniFileCredentials({ profile: config.aws_profile }); + } else { + return reject('Must specify profile or accessKeyId and secretAccessKey'); + } + s3Client = new AWS.S3(); + resolve(); + } else if (config.transferTo == 'gcs') { + console.log('Initializing GCS connection') + gcsClient = new GCS({ + projectId: config.gcs_projectId, + keyFilename: config.gcs_keyFilename + }); + resolve(); + } + }); +} + +function run(files) { + console.log('Processing '+files.length+' files'); + console.log('Saving files to '+config.transferTo); + return _processFiles(files, fileHandlers[config.transferTo]); +} + +/** + * Handler that prints url to command line + * @param {Object} file the file info + * @param {Function} callback + */ +function print(file, callback) { + console.log(file.url); + callback(); +} + +/** + * Handler that saves file to filesystem + * @param {Object} file the file info + * @param {Function} callback + */ +function filesystem(file, callback) { + request(file.url).on('error', function(error) { + callback(error); + }).on('response', function(response) { + if (_requestErrorHandler(false, response)) { + return callback(); + } + var ws = fs.createWriteStream(config.filesystemPath+'/'+file.newFileName); + ws.on('error', function(error) { console.log('1', error); }); + this.pipe(ws).on('error', function(error) { + console.log('Failed to write file', error); + }).on('finish', function() { + _changeDBFileField(file, callback); + }); + }); +} + +/** + * Handler that saves file to S3 + * @param {Object} file the file info + * @param {Function} callback + */ +function s3(file, callback) { + request({ + url: file.url, + encoding: null + }, function(error, response, body) { + if (_requestErrorHandler(error, response)) { + return callback(error); + } + + s3Client.putObject({ + Bucket: config.aws_bucket, + Key: file.newFileName, + ACL: 'public-read', + ContentType: response.headers['content-type'], + ContentLength: response.headers['content-length'], + Body: body + }, function(error) { + if (error) { + return callback(error); + } + _changeDBFileField(file, callback); + }); + }); +} + +/** + * Handler that saves file to GCS + * @param {Object} file the file info + * @param {Function} callback + */ +function gcs(file, callback) { + request({ + url: file.url, + encoding: null + }, function(error, response, body) { + if (_requestErrorHandler(error, response)) { + return callback(error); + } + + var newFile = gcsClient.bucket(config.gcs_bucket).file(file.fileName); + + var uploadStream = newFile.createWriteStream({ + metadata: { + contentType: response.headers['content-type'] || 'application/octet-stream' + } + }); + uploadStream.on('error', function(error) { + callback(error); + }).on('finish', function() { + // Second call to set public read ACL after object is uploaded. + newFile.makePublic(function(error, res) { + if (error) { + return callback(error); + } + _changeDBFileField(file, callback); + }) + }); + uploadStream.write(body); + uploadStream.end(); + }); +} + +/** + * Handle error from requests + */ +function _requestErrorHandler(error, response) { + if (error) { + return error; + } else if (response.statusCode >= 300) { + console.log('Failed request ('+response.statusCode+') skipping: '+response.request.href); + return true; + } + return false; +} + +/** + * Converts a file into a non Parse file name + * @param {String} fileName + * @return {String} + */ +function _nonParseFileName(fileName) { + if (fileName.indexOf('tfss-') === 0) { + return fileName.replace('tfss-', ''); + } else if (legacyFilesPrefixRegex.test(fileName)) { + var newPrefix = crypto.randomBytes(32/2).toString('hex'); + return newPrefix + fileName.replace(legacyFilesPrefixRegex, ''); + } else { + return fileName; + } +} + +/** + * Loops through 5 files at a time and calls handler + * @param {Array} files Array of files + * @param {Function} handler handler function for file + * @return {Promise} + */ +function _processFiles(files, handler) { + return new Promise(function(resolve, reject) { + async.eachOfLimit(files, 5, function(file, index, callback) { + process.stdout.write('Processing '+(index+1)+'/'+files.length+'\r'); + file.newFileName = _nonParseFileName(file.fileName); + if (_shouldTransferFile(file)) { + handler(file, callback); + } else { + callback(); + } + }, function(error) { + if (error) { + return reject('Error!', error); + } + resolve('\nComplete!'); + }); + }) +} + +/** + * Changes the file name that is saved in MongoDB + * @param {Object} file the file info + * @param {Function} callback + */ +function _changeDBFileField(file, callback) { + if (file.fileName == file.newFileName || !config.renameInDatabase) { + return callback(); + } + var update = {$set:{}}; + update.$set[file.fieldName] = file.newFileName; + db.collection(file.className).update( + { _id : file.objectId }, + update, + function(error, result ) { + callback(error); + } + ); +} + +/** + * Determines if a file should be transferred based on configuration + * @param {Object} file the file info + */ +function _shouldTransferFile(file) { + if (config.filesToTransfer == 'all') { + return true; + } else if ( + config.filesToTransfer == 'parseOnly' && + file.fileName != file.newFileName + ) { + return true; + } else if ( + config.filesToTransfer == 'parseServerOnly' && + file.fileName == file.newFileName + ) { + return true; + } + return false; +} \ No newline at end of file diff --git a/package.json b/package.json index 613f117..debe08a 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,9 @@ }, "homepage": "https://github.com/parse-server-modules/parse-files-utils#readme", "dependencies": { + "async": "^2.0.0", + "aws-sdk": "^2.4.7", + "gcloud": "^0.36.0", "inquirer": "^1.1.2", "parse": "^1.8.5", "request": "^2.72.0" From 3808ea56dd4c82ecd06c681a840b64b8fab2689b Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Sun, 17 Jul 2016 21:40:04 -0400 Subject: [PATCH 03/12] Add .DS_Store to .gitignore --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index e920c16..1856faf 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,5 @@ node_modules # Optional REPL history .node_repl_history + +.DS_Store \ No newline at end of file From 63e5bcb659b09b1dfa69dc20dceb6a0e034d3ee7 Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Sun, 17 Jul 2016 21:51:18 -0400 Subject: [PATCH 04/12] Add a final configuration output and confirmation before continuing --- lib/index.js | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/lib/index.js b/lib/index.js index 302d3ea..0be8e28 100644 --- a/lib/index.js +++ b/lib/index.js @@ -1,4 +1,6 @@ var Parse = require('parse/node'); +var inquirer = require('inquirer'); + var schemas = require('./schemas'); var transfer = require('./transfer'); @@ -9,19 +11,31 @@ module.exports = initialize; function initialize(config) { questions(config).then(function (answers) { config = Object.assign(config, answers); + console.log(JSON.stringify(config, null, 2)); + return inquirer.prompt({ + type: 'confirm', + name: 'next', + message: 'About to start the file transfer. Does the above look correct?', + default: true, + }); + }).then(function(answers) { + if (!answers.next) { + console.log('Aborted!'); + process.exit(); + } Parse.initialize(config.applicationId, null, config.masterKey); Parse.serverURL = config.serverURL; - return transfer.init(config).then(function() { - return getAllFileObjects(); - }).then(function(objects) { - return transfer.run(objects); - }).then(function() { - console.log('Complete!'); - process.exit(); - }).catch(function(error) { - console.log(error); - process.exit(1); - }); + return transfer.init(config); + }).then(function() { + return getAllFileObjects(); + }).then(function(objects) { + return transfer.run(objects); + }).then(function() { + console.log('Complete!'); + process.exit(); + }).catch(function(error) { + console.log(error); + process.exit(1); }); } From 3ff93c4ab2666ebd4858d3af80d800e8ef6f620a Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Sun, 17 Jul 2016 21:55:33 -0400 Subject: [PATCH 05/12] Handle queries with skip > 10k --- lib/index.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/index.js b/lib/index.js index 0be8e28..fce1e42 100644 --- a/lib/index.js +++ b/lib/index.js @@ -72,6 +72,10 @@ function onlyFiles(schemas) { function getAllObjects(baseQuery) { var allObjects = []; var next = function(startIndex) { + if (startIndex > 10000) { + baseQuery.greaterThan('createdAt', allObjects[allObjects.length-1].createdAt); + startIndex = 0; + } baseQuery.skip(startIndex); return baseQuery.find({useMasterKey: true}).then(function(r){ allObjects = allObjects.concat(r); @@ -87,7 +91,8 @@ function getAllObjects(baseQuery) { function getObjectsWithFilesFromSchema(schema) { var query = new Parse.Query(schema.className); - query.select(schema.fields); + query.select(schema.fields.concat('createdAt')); + query.ascending('createdAt'); query.limit(1000); schema.fields.forEach(function(field) { query.exists(field); From a5469c29c900cd6e82b7fc2edf9e90ea97aa5328 Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Sun, 17 Jul 2016 22:20:55 -0400 Subject: [PATCH 06/12] Add option for asyncLimit to set number of files processed at the same time --- lib/transfer.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/transfer.js b/lib/transfer.js index 5c378c0..0ac9a5d 100644 --- a/lib/transfer.js +++ b/lib/transfer.js @@ -213,8 +213,9 @@ function _nonParseFileName(fileName) { * @return {Promise} */ function _processFiles(files, handler) { + var asyncLimit = config.asyncLimit || 5; return new Promise(function(resolve, reject) { - async.eachOfLimit(files, 5, function(file, index, callback) { + async.eachOfLimit(files, asyncLimit, function(file, index, callback) { process.stdout.write('Processing '+(index+1)+'/'+files.length+'\r'); file.newFileName = _nonParseFileName(file.fileName); if (_shouldTransferFile(file)) { From 3df3d164388bd21c126cdebe6999d54e5688797b Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Sun, 17 Jul 2016 22:29:50 -0400 Subject: [PATCH 07/12] Update README --- README.md | 56 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 51bfde8..67dff72 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,48 @@ # parse-files-utils -Utilities to list and migrate Parse files +Utilities to list and migrate Parse files. -This utility will print in the terminal all the files URL's from the parse server +This utility will do the following: -This can be really useful when you migrate your files and want to move the files from the Parse S3 host to you own. +1. Get all files across all classess in a Parse database. +2. Print file URLs to console OR transfer to S3, GCS, or filesystem. +3. Rename files so that [Parse Server](https://github.com/ParsePlatform/parse-server) no longer detects that they are hosted by Parse. +4. Update MongoDB with new file names. -This utility won't save the files anywhere else. You can save the results to a file or pipe the results to another program: +#### \*WARNING\* +As soon as this script transfers files away from Parse.com hosted files (and renames them in the database) +any clients that use api.parse.com will no longer be able to access the files. +See the section titled "5. Files" in the [Parse Migration Guide](https://parse.com/migration) +and Parse Server [issue #1582](https://github.com/ParsePlatform/parse-server/issues/1582). -## usage +## Installation -``` -$ node index.js -``` +1. Clone the repo: `git clone git@github.com:parse-server-modules/parse-files-utils.git` +2. cd into repo: `cd parse-file-utils` +3. Install dependencies: `npm install` + +## Usage -you can optionally specify a json configuration file +The quickest way to get started is to run `node index.js` and follow the command prompts. +You can optionally specify a js/json configuration file (see [config.example.js](./config.example.js)). ``` -$ node index.js ./config.json +$ node index.js config.js ``` -Example `config.json`: - -``` -{ - "applicationId": "PVpAyhBmNuouwPBNksRLPVpAyhBmNuouwPBNksRL", - "masterKey": "DQzeY9lelKLPeWQH6zhsNUqnrudyOU07hjC6g53a", - "serverURL": "http://parse-server.yourdomain.com/parse" -} -``` \ No newline at end of file +### Available configuration options + +* `applicationId`: Parse application id. +* `masterKey`: Parse master key. +* `mongoURL`: MongoDB connection url. +* `serverURL`: The URL for the Parse server (default: http://api.parse.com/1). +* `filesToTransfer`: Which files to transfer. Accepted options: `parseOnly`, `parseServerOnly`, `all`. +* `renameInDatabase` (boolean): Whether or not to rename files in MongoDB. +* `filesystemPath`: The path/directory to save files to when transfering to filesystem. +* `aws_accessKeyId`: AWS access key id. +* `aws_secretAccessKey`: AWS secret access key. +* `aws_profile`: AWS credentials profile. Can be specified in lieu of `aws_accessKeyId` and `aws_secretAccessKey`. +* `aws_bucket`: S3 bucket name. +* `gcs_projectId`: GCS project id. +* `gcs_keyFilename`: GCS key filename (ie. `credentials.json`). +* `gcs_bucket`: GCS bucket name. +* `asyncLimit`: The number of files to process at the same time (default: 5). \ No newline at end of file From ad2e5d463a692b0d95212b9ae0b48f302ee0fb69 Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Mon, 18 Jul 2016 23:21:11 -0400 Subject: [PATCH 08/12] Update getAllObjects query to use createdAt for every query No need to handle skip > 10k separately now. --- lib/index.js | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/index.js b/lib/index.js index fce1e42..b3f364d 100644 --- a/lib/index.js +++ b/lib/index.js @@ -71,22 +71,20 @@ function onlyFiles(schemas) { function getAllObjects(baseQuery) { var allObjects = []; - var next = function(startIndex) { - if (startIndex > 10000) { + var next = function() { + if (allObjects.length) { baseQuery.greaterThan('createdAt', allObjects[allObjects.length-1].createdAt); - startIndex = 0; } - baseQuery.skip(startIndex); return baseQuery.find({useMasterKey: true}).then(function(r){ allObjects = allObjects.concat(r); if (r.length == 0) { return Promise.resolve(allObjects); } else { - return next(startIndex+r.length); + return next(); } }); } - return next(0); + return next(); } function getObjectsWithFilesFromSchema(schema) { From 1a259cdf59ac11d0d93d51200c712ae68bdcde19 Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Tue, 19 Jul 2016 08:59:56 -0400 Subject: [PATCH 09/12] Resolve configuration file to absolute path before loading --- index.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/index.js b/index.js index afc725b..37ed6f2 100644 --- a/index.js +++ b/index.js @@ -1,7 +1,10 @@ +var path = require('path'); var configFilePath = process.argv[2]; var config = {}; if (configFilePath) { + configFilePath = path.resolve(configFilePath); + try { config = require(configFilePath); } catch(e) { From c90b2b8d56e1fa996f393c897d0e13583bd1c9f2 Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Tue, 19 Jul 2016 09:22:42 -0400 Subject: [PATCH 10/12] Remove unnecessary logging --- lib/index.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/index.js b/lib/index.js index b3f364d..13233ba 100644 --- a/lib/index.js +++ b/lib/index.js @@ -3,7 +3,6 @@ var inquirer = require('inquirer'); var schemas = require('./schemas'); var transfer = require('./transfer'); - var questions = require('./questions.js'); module.exports = initialize; @@ -44,7 +43,6 @@ function getAllFileObjects() { return schemas.get().then(function(res){ console.log("Fetching all objects with files..."); var schemasWithFiles = onlyFiles(res); - console.log('swf', schemasWithFiles); return Promise.all(schemasWithFiles.map(getObjectsWithFilesFromSchema)); }).then(function(results) { var files = results.reduce(function(c, r) { From c51e32e30a7ddaa6d0a240c4db9567280dac46b5 Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Tue, 19 Jul 2016 09:28:44 -0400 Subject: [PATCH 11/12] Refactor to make use of Parse Server file adapters - File adapter can be passed in through configuration file. - If no filesAdapter specified will load fs, s3, or gcs adapter based on prompts. - Also updated callback based functions to return a promise. --- README.md | 9 +- config.example.js | 23 ++++- lib/questions.js | 9 +- lib/transfer.js | 234 ++++++++++++++++------------------------------ package.json | 5 +- 5 files changed, 114 insertions(+), 166 deletions(-) diff --git a/README.md b/README.md index 67dff72..7366bfa 100644 --- a/README.md +++ b/README.md @@ -22,11 +22,11 @@ and Parse Server [issue #1582](https://github.com/ParsePlatform/parse-server/iss ## Usage -The quickest way to get started is to run `node index.js` and follow the command prompts. +The quickest way to get started is to run `npm start` and follow the command prompts. You can optionally specify a js/json configuration file (see [config.example.js](./config.example.js)). ``` -$ node index.js config.js +$ npm start config.js ``` ### Available configuration options @@ -37,10 +37,13 @@ $ node index.js config.js * `serverURL`: The URL for the Parse server (default: http://api.parse.com/1). * `filesToTransfer`: Which files to transfer. Accepted options: `parseOnly`, `parseServerOnly`, `all`. * `renameInDatabase` (boolean): Whether or not to rename files in MongoDB. +* `filesAdapter`: A Parse Server file adapter with a function for `createFile(filename, data)` +(ie. [parse-server-fs-adapter](https://github.com/parse-server-modules/parse-server-fs-adapter), +[parse-server-s3-adapter](https://github.com/parse-server-modules/parse-server-s3-adapter), +[parse-server-gcs-adapter](https://github.com/parse-server-modules/parse-server-gcs-adapter)). * `filesystemPath`: The path/directory to save files to when transfering to filesystem. * `aws_accessKeyId`: AWS access key id. * `aws_secretAccessKey`: AWS secret access key. -* `aws_profile`: AWS credentials profile. Can be specified in lieu of `aws_accessKeyId` and `aws_secretAccessKey`. * `aws_bucket`: S3 bucket name. * `gcs_projectId`: GCS project id. * `gcs_keyFilename`: GCS key filename (ie. `credentials.json`). diff --git a/config.example.js b/config.example.js index 92f0e72..07ee13a 100644 --- a/config.example.js +++ b/config.example.js @@ -1,10 +1,14 @@ +var FileAdapter = require('parse-server-fs-adapter'); +var S3Adapter = require('parse-server-s3-adapter'); +var GCSAdapter = require('parse-server-gcs-adapter'); + module.exports = { applicationId: "PARSE_APPLICATION_ID", masterKey: "PARSE_MASTER_KEY", mongoURL: "mongodb://:@mongourl.com:27017/database_name", serverURL: "https://api.customparseserver.com/parse", filesToTransfer: 'parseOnly', - renameInDatabase: true, + // renameInDatabase: true, // For filesystem configuration filesystemPath: './downloaded_files', @@ -17,5 +21,20 @@ module.exports = { // For GCS configuration gcs_projectId: "GCS_PROJECT_ID", gcs_keyFilename: "credentials.json", - gcs_bucket: "BUCKET_NAME" + gcs_bucket: "BUCKET_NAME", + + // Or set filesAdapter to a Parse Server file adapter + // filesAdapter: new FileAdapter({ + // filesSubDirectory: './downloaded_files' + // }), + // filesAdapter: new S3Adapter({ + // accessKey: 'ACCESS_KEY_ID', + // secretKey: 'SECRET_ACCESS_KEY', + // bucket: 'BUCKET_NAME' + // }), + // filesAdapter: new GCSAdapter({ + // projectId: "GCS_PROJECT_ID", + // keyFilename: "credentials.json", + // bucket: "BUCKET_NAME", + // }), }; \ No newline at end of file diff --git a/lib/questions.js b/lib/questions.js index 79a68d9..016b76d 100644 --- a/lib/questions.js +++ b/lib/questions.js @@ -41,8 +41,8 @@ function questions(config) { default: true, when: function(answers) { return !config.renameInDatabase && - (answers.filesToTransfer == 'all' || - answers.filesToTransfer == 'parseOnly'); + (answers.filesToTransfer == 'all' || config.filesToTransfer == 'all' || + config.filesToTransfer == 'parseOnly' || answers.filesToTransfer == 'parseOnly'); } }, { type: 'input', @@ -66,7 +66,10 @@ function questions(config) { {name: 'AWS S3', value: 's3'}, {name: 'Google Cloud Storage', value: 'gcs'}, ], - when: (['print','filesystem','s3','gcs'].indexOf(config.transferTo) == -1) + when: function() { + return (['print','filesystem','s3','gcs'].indexOf(config.transferTo) == -1) && + !config.filesAdapter + } }, // filesystem settings diff --git a/lib/transfer.js b/lib/transfer.js index 0ac9a5d..acecaab 100644 --- a/lib/transfer.js +++ b/lib/transfer.js @@ -1,21 +1,15 @@ -var fs = require('fs'); var request = require('request'); var crypto = require('crypto'); var async = require('async'); -var AWS = require('aws-sdk'); -var GCS = require('gcloud').storage; +var FilesystemAdapter = require('parse-server-fs-adapter'); +var S3Adapter = require('parse-server-s3-adapter'); +var GCSAdapter = require('parse-server-gcs-adapter'); var MongoClient = require('mongodb').MongoClient; // regex that matches old legacy Parse hosted files var legacyFilesPrefixRegex = new RegExp("^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}-"); -var db, config, s3Client, gcsClient; -var fileHandlers = { - print: print, - filesystem: filesystem, - s3: s3, - gcs: gcs, -}; +var db, config; module.exports.init = init; module.exports.run = run; @@ -41,140 +35,40 @@ function init(options) { } function _setup() { - return new Promise(function(resolve, reject) { - if (config.transferTo == 'print') { - resolve(); - } else if (config.transferTo == 'filesystem') { - console.log('Creating directory at '+config.filesystemPath); - process.umask(0); - fs.mkdir(config.filesystemPath, function() { - resolve(); - }); - } else if (config.transferTo == 's3') { - console.log('Initializing S3 connection') - if (config.aws_accessKeyId && config.aws_secretAccessKey) { - AWS.config.credentials = new AWS.Credentials(config.aws_accessKeyId, config.aws_secretAccessKey); - } else if (config.aws_profile) { - AWS.config.credentials = new AWS.SharedIniFileCredentials({ profile: config.aws_profile }); - } else { - return reject('Must specify profile or accessKeyId and secretAccessKey'); - } - s3Client = new AWS.S3(); - resolve(); - } else if (config.transferTo == 'gcs') { - console.log('Initializing GCS connection') - gcsClient = new GCS({ - projectId: config.gcs_projectId, - keyFilename: config.gcs_keyFilename - }); - resolve(); - } - }); -} - -function run(files) { - console.log('Processing '+files.length+' files'); - console.log('Saving files to '+config.transferTo); - return _processFiles(files, fileHandlers[config.transferTo]); -} - -/** - * Handler that prints url to command line - * @param {Object} file the file info - * @param {Function} callback - */ -function print(file, callback) { - console.log(file.url); - callback(); -} - -/** - * Handler that saves file to filesystem - * @param {Object} file the file info - * @param {Function} callback - */ -function filesystem(file, callback) { - request(file.url).on('error', function(error) { - callback(error); - }).on('response', function(response) { - if (_requestErrorHandler(false, response)) { - return callback(); - } - var ws = fs.createWriteStream(config.filesystemPath+'/'+file.newFileName); - ws.on('error', function(error) { console.log('1', error); }); - this.pipe(ws).on('error', function(error) { - console.log('Failed to write file', error); - }).on('finish', function() { - _changeDBFileField(file, callback); + config.adapterName = config.transferTo || config.filesAdapter.constructor.name; + console.log('Initializing '+config.adapterName+' adapter'); + if (config.filesAdapter && config.filesAdapter.createFile) { + return Promise.resolve(); + } else if (config.transferTo == 'print') { + return Promise.resolve(); + } else if (config.transferTo == 'filesystem') { + config.filesAdapter = new FilesystemAdapter({ + filesSubDirectory: config.filesystemPath }); - }); -} - -/** - * Handler that saves file to S3 - * @param {Object} file the file info - * @param {Function} callback - */ -function s3(file, callback) { - request({ - url: file.url, - encoding: null - }, function(error, response, body) { - if (_requestErrorHandler(error, response)) { - return callback(error); - } - - s3Client.putObject({ - Bucket: config.aws_bucket, - Key: file.newFileName, - ACL: 'public-read', - ContentType: response.headers['content-type'], - ContentLength: response.headers['content-length'], - Body: body - }, function(error) { - if (error) { - return callback(error); - } - _changeDBFileField(file, callback); + } else if (config.transferTo == 's3') { + config.filesAdapter = new S3Adapter({ + accessKey: config.aws_secretAccessKey, + secretKey: config.aws_secretAccessKey, + bucket: config.aws_bucket, + directAccess: true }); - }); + } else if (config.transferTo == 'gcs') { + config.filesAdapter = new GCSAdapter({ + projectId: config.gcs_projectId, + keyFilename: config.gcs_keyFilename, + bucket: config.gcs_bucket, + directAccess: true + }); + } else { + return Promise.reject('Invalid files adapter'); + } + return Promise.resolve(); } -/** - * Handler that saves file to GCS - * @param {Object} file the file info - * @param {Function} callback - */ -function gcs(file, callback) { - request({ - url: file.url, - encoding: null - }, function(error, response, body) { - if (_requestErrorHandler(error, response)) { - return callback(error); - } - - var newFile = gcsClient.bucket(config.gcs_bucket).file(file.fileName); - - var uploadStream = newFile.createWriteStream({ - metadata: { - contentType: response.headers['content-type'] || 'application/octet-stream' - } - }); - uploadStream.on('error', function(error) { - callback(error); - }).on('finish', function() { - // Second call to set public read ACL after object is uploaded. - newFile.makePublic(function(error, res) { - if (error) { - return callback(error); - } - _changeDBFileField(file, callback); - }) - }); - uploadStream.write(body); - uploadStream.end(); - }); +function run(files) { + console.log('Processing '+files.length+' files'); + console.log('Saving files with '+config.adapterName); + return _processFiles(files); } /** @@ -207,7 +101,7 @@ function _nonParseFileName(fileName) { } /** - * Loops through 5 files at a time and calls handler + * Loops through n files at a time and calls handler * @param {Array} files Array of files * @param {Function} handler handler function for file * @return {Promise} @@ -219,13 +113,13 @@ function _processFiles(files, handler) { process.stdout.write('Processing '+(index+1)+'/'+files.length+'\r'); file.newFileName = _nonParseFileName(file.fileName); if (_shouldTransferFile(file)) { - handler(file, callback); + _transferFile(file).then(callback, callback); } else { callback(); } }, function(error) { if (error) { - return reject('Error!', error); + return reject('\nError!', error); } resolve('\nComplete!'); }); @@ -235,21 +129,25 @@ function _processFiles(files, handler) { /** * Changes the file name that is saved in MongoDB * @param {Object} file the file info - * @param {Function} callback */ -function _changeDBFileField(file, callback) { - if (file.fileName == file.newFileName || !config.renameInDatabase) { - return callback(); - } - var update = {$set:{}}; - update.$set[file.fieldName] = file.newFileName; - db.collection(file.className).update( - { _id : file.objectId }, - update, - function(error, result ) { - callback(error); +function _changeDBFileField(file) { + return new Promise(function(resolve, reject) { + if (file.fileName == file.newFileName || !config.renameInDatabase) { + return resolve(); } - ); + var update = {$set:{}}; + update.$set[file.fieldName] = file.newFileName; + db.collection(file.className).update( + { _id : file.objectId }, + update, + function(error, result ) { + if (error) { + return reject(error); + } + resolve(); + } + ); + }); } /** @@ -271,4 +169,28 @@ function _shouldTransferFile(file) { return true; } return false; +} + +/** + * Request file from URL and upload with filesAdapter + * @param {Ibject} file the file info object + */ +function _transferFile(file) { + return new Promise(function(resolve, reject) { + if (config.transferTo == 'print') { + console.log(file.url); + // Use process.nextTick to avoid max call stack error + return process.nextTick(resolve); + } + request({ url: file.url, encoding: null }, function(error, response, body) { + if (_requestErrorHandler(error, response)) { + return reject(error); + } + config.filesAdapter.createFile( + file.newFileName, body, response.headers['content-type'] + ).then(function() { + return _changeDBFileField(file); + }).then(resolve, reject); + }); + }); } \ No newline at end of file diff --git a/package.json b/package.json index debe08a..9fd53f7 100644 --- a/package.json +++ b/package.json @@ -24,10 +24,11 @@ "homepage": "https://github.com/parse-server-modules/parse-files-utils#readme", "dependencies": { "async": "^2.0.0", - "aws-sdk": "^2.4.7", - "gcloud": "^0.36.0", "inquirer": "^1.1.2", "parse": "^1.8.5", + "parse-server-fs-adapter": "^1.0.0", + "parse-server-gcs-adapter": "^1.0.0", + "parse-server-s3-adapter": "^1.0.4", "request": "^2.72.0" } } From 1b1abda065355674cd5cad4df1f9f47e5b455b93 Mon Sep 17 00:00:00 2001 From: Jeremy Pease Date: Tue, 19 Jul 2016 12:52:37 -0400 Subject: [PATCH 12/12] Set default for renameInDatabase to false --- config.example.js | 2 +- lib/questions.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config.example.js b/config.example.js index 07ee13a..7ceaeab 100644 --- a/config.example.js +++ b/config.example.js @@ -8,7 +8,7 @@ module.exports = { mongoURL: "mongodb://:@mongourl.com:27017/database_name", serverURL: "https://api.customparseserver.com/parse", filesToTransfer: 'parseOnly', - // renameInDatabase: true, + renameInDatabase: false, // For filesystem configuration filesystemPath: './downloaded_files', diff --git a/lib/questions.js b/lib/questions.js index 016b76d..3e92a11 100644 --- a/lib/questions.js +++ b/lib/questions.js @@ -38,7 +38,7 @@ function questions(config) { type: 'confirm', name: 'renameInDatabase', message: 'Rename Parse hosted files in the database after transfer?', - default: true, + default: false, when: function(answers) { return !config.renameInDatabase && (answers.filesToTransfer == 'all' || config.filesToTransfer == 'all' ||