* Lots of improvements to info extraction system

* MUCH faster browsing in file base
* Better release year estimation
* New tool: exiftool2desc
This commit is contained in:
Bryan Ashby 2017-05-19 18:41:13 -06:00
parent b8595f667d
commit 54a7f4f774
8 changed files with 226 additions and 80 deletions

View file

@ -23,6 +23,7 @@ const paths = require('path');
const temptmp = require('temptmp').createTrackedSession('file_area');
const iconv = require('iconv-lite');
const exec = require('child_process').exec;
const moment = require('moment');
exports.isInternalArea = isInternalArea;
exports.getAvailableFileAreas = getAvailableFileAreas;
@ -214,7 +215,7 @@ function attemptSetEstimatedReleaseDate(fileEntry) {
const patterns = Config.fileBase.yearEstPatterns.map( p => new RegExp(p, 'gmi'));
function getMatch(input) {
if(input) {
if(input) {
let m;
for(let i = 0; i < patterns.length; ++i) {
m = patterns[i].exec(input);
@ -228,6 +229,10 @@ function attemptSetEstimatedReleaseDate(fileEntry) {
//
// We attempt detection in short -> long order
//
// Throw out anything that is current_year + 2 (we give some leway)
// with the assumption that must be wrong.
//
const maxYear = moment().add(2, 'year').year();
const match = getMatch(fileEntry.desc) || getMatch(fileEntry.descLong);
if(match && match[1]) {
let year;
@ -244,7 +249,7 @@ function attemptSetEstimatedReleaseDate(fileEntry) {
year = parseInt(match[1]);
}
if(year) {
if(year && year <= maxYear) {
fileEntry.meta.est_release_year = year;
}
}
@ -394,6 +399,20 @@ function populateFileEntryWithArchive(fileEntry, filePath, stepInfo, iterator, c
);
}
function getInfoExtractUtilForDesc(mimeType, descType) {
let util = _.get(Config, [ 'fileTypes', mimeType, `${descType}DescUtil` ]);
if(!_.isString(util)) {
return;
}
util = _.get(Config, [ 'infoExtractUtils', util ]);
if(!util || !_.isString(util.cmd)) {
return;
}
return util;
}
function populateFileEntryNonArchive(fileEntry, filePath, stepInfo, iterator, cb) {
async.series(
@ -408,27 +427,42 @@ function populateFileEntryNonArchive(fileEntry, filePath, stepInfo, iterator, cb
return callback(null);
}
const shortDescUtil = _.get(Config, [ 'fileTypes', mimeType, 'shortDescUtil' ]);
if(!shortDescUtil || !shortDescUtil.cmd) {
return callback(null);
}
const args = (shortDescUtil.args || [ '{filePath} '] ).map( arg => stringFormat(arg, { filePath : filePath } ) );
exec(`${shortDescUtil.cmd} ${args.join(' ')}`, (err, stdout) => {
if(err) {
logDebug(
{ error : err.message, cmd : shortDescUtil.cmd, args : args },
'Short description command failed'
);
} else {
//
// "...no more than 45 characters long" -- FILE_ID.DIZ v1.9 spec:
// http://www.textfiles.com/computers/fileid.txt
//
fileEntry.desc = (wordWrapText( (stdout || '').trim(), { width : 45 } ).wrapped || []).join('\n');
async.eachSeries( [ 'short', 'long' ], (descType, nextDesc) => {
const util = getInfoExtractUtilForDesc(mimeType, descType);
if(!util) {
return nextDesc(null);
}
const args = (util.args || [ '{filePath} '] ).map( arg => stringFormat(arg, { filePath : filePath } ) );
exec(`${util.cmd} ${args.join(' ')}`, (err, stdout) => {
if(err) {
logDebug(
{ error : err.message, cmd : util.cmd, args : args },
`${_.upperFirst(descType)} description command failed`
);
} else {
stdout = (stdout || '').trim();
if(stdout.length > 0) {
const key = 'short' === descType ? 'desc' : 'descLong';
if('desc' === key) {
//
// Word wrap short descriptions to FILE_ID.DIZ spec
//
// "...no more than 45 characters long"
//
// See http://www.textfiles.com/computers/fileid.txt
//
stdout = (wordWrapText( stdout, { width : 45 } ).wrapped || []).join('\n');
}
fileEntry[key] = stdout;
}
}
return nextDesc(null);
});
}, () => {
return callback(null);
});
},