perf(architecture): Big performance improvements over big databases and files highlight in search. (#63)

Reduction database space over 50%. Change database version to v7.

BREAKING CHANGE: databases v6 and v7 are incompatible and need a lot of time for updating (may be even some days/a lot of hours on very big databases)
This commit is contained in:
Alexey Kasyanchuk 2018-11-11 23:44:10 +03:00 committed by GitHub
parent 13ca63b954
commit 6afe85798a
11 changed files with 271 additions and 111 deletions

30
package-lock.json generated
View File

@ -1,6 +1,6 @@
{
"name": "rats-search",
"version": "0.30.0",
"version": "0.30.1",
"lockfileVersion": 1,
"requires": true,
"dependencies": {
@ -8541,12 +8541,14 @@
"balanced-match": {
"version": "1.0.0",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"brace-expansion": {
"version": "1.1.11",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
@ -8561,17 +8563,20 @@
"code-point-at": {
"version": "1.1.0",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"concat-map": {
"version": "0.0.1",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"console-control-strings": {
"version": "1.1.0",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"core-util-is": {
"version": "1.0.2",
@ -8688,7 +8693,8 @@
"inherits": {
"version": "2.0.3",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"ini": {
"version": "1.3.5",
@ -8700,6 +8706,7 @@
"version": "1.0.0",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"number-is-nan": "^1.0.0"
}
@ -8714,6 +8721,7 @@
"version": "3.0.4",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"brace-expansion": "^1.1.7"
}
@ -8721,12 +8729,14 @@
"minimist": {
"version": "0.0.8",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"minipass": {
"version": "2.2.4",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"safe-buffer": "^5.1.1",
"yallist": "^3.0.0"
@ -8745,6 +8755,7 @@
"version": "0.5.1",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"minimist": "0.0.8"
}
@ -8825,7 +8836,8 @@
"number-is-nan": {
"version": "1.0.1",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"object-assign": {
"version": "4.1.1",
@ -8837,6 +8849,7 @@
"version": "1.4.0",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"wrappy": "1"
}
@ -8958,6 +8971,7 @@
"version": "1.0.2",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"code-point-at": "^1.0.0",
"is-fullwidth-code-point": "^1.0.0",

View File

@ -320,7 +320,17 @@ export default class Torrent extends Component {
torrent.path && torrent.path.length > 0
?
torrent.path.map((path, index) => {
return <div key={index} className='break-word fs0-75' style={{paddingTop: '0.3em', marginLeft: '0.6em'}}>{path}</div>
const boldRe = /\<b\>(.+?)\<\/b\>/g;
let boldText;
let newPath = [];
let idx = 0;
while ((boldText = boldRe.exec(path)) !== null) {
newPath.push(<span>{path.substring(idx, boldText.index)}</span>);
newPath.push(<b>{boldText[1]}</b>);
idx = boldRe.lastIndex;
}
newPath.push(<span>{path.substring(idx, path.length)}</span>);
return <div key={index} className='break-word fs0-75' style={{paddingTop: '0.3em', marginLeft: '0.6em'}}>{newPath}</div>
})
:
null

View File

@ -1,11 +1,12 @@
const ipaddr = require('ipaddr.js');
const forBigTable = require('./forBigTable')
const compareVersions = require('compare-versions');
const getTorrent = require('./gettorrent')
const getTorrent = require('./getTorrent')
const _ = require('lodash')
const asyncForEach = require('./asyncForEach')
const cpuUsage = require('./bt/cpu-usage-global')
const magnetParse = require('./magnetParse')
const parseTorrentFiles = require('./parsetTorrentFiles')
module.exports = async ({
sphinx,
@ -125,14 +126,14 @@ module.exports = async ({
let result = {torrents: rows[0].torrents || 0, size: rows[0].sz || 0}
sphinx.query('SELECT count(*) AS files FROM `files`', function (error, rows, fields) {
sphinx.query('SELECT sum(files) AS flist FROM `torrents`', function (error, rows, fields) {
if(!rows) {
logTE('statistic', error)
callback(undefined)
return;
}
result.files = rows[0].files || 0
result.files = (rows[0] && rows[0].flist) || 0
callback(result)
})
@ -183,7 +184,7 @@ module.exports = async ({
if(options.files)
{
torrent.filesList = await sphinx.query('SELECT * FROM `files` WHERE `hash` = ? LIMIT 50000', hash);
torrent.filesList = parseTorrentFiles(await sphinx.query('SELECT * FROM `files` WHERE `hash` = ?', hash));
callback(baseRowData(torrent))
}
else
@ -239,17 +240,10 @@ module.exports = async ({
}
const inSql = Object.keys(hashes).map(hash => sphinx.escape(hash)).join(',');
sphinxSingle.query(`SELECT * FROM files WHERE hash IN(${inSql}) limit 50000`, (error, files) => {
if(!files)
{
files = []
}
files.forEach((file) => {
if(!hashes[file.hash].filesList)
hashes[file.hash].filesList = []
hashes[file.hash].filesList.push(file)
})
sphinxSingle.query(`SELECT * FROM files WHERE hash IN(${inSql})`, (error, files) => {
for(const file of files)
hashes[file.hash].filesList = parseTorrentFiles(file);
callback(Object.values(hashes))
})
})
@ -403,70 +397,39 @@ module.exports = async ({
const index = navigation.index || 0;
const limit = navigation.limit || 10;
let args = [text, index, limit];
let args = [text, text, index, limit];
const orderBy = navigation.orderBy;
let order = '';
let where = '';
/*
if(orderBy && orderBy.length > 0)
{
const orderDesc = navigation.orderDesc ? 'DESC' : 'ASC';
args.splice(1, 0, orderBy);
order = 'ORDER BY ?? ' + orderDesc;
}
*/
/*
if(safeSearch)
{
where += " and contentCategory != 'xxx' ";
}
if(navigation.type && navigation.type.length > 0)
{
where += ' and contentType = ' + sphinx.escape(navigation.type) + ' ';
}
if(navigation.size)
{
if(navigation.size.max > 0)
where += ' and torrentSize < ' + sphinx.escape(navigation.size.max) + ' ';
if(navigation.size.min > 0)
where += ' and torrentSize > ' + sphinx.escape(navigation.size.min) + ' ';
}
if(navigation.files)
{
if(navigation.files.max > 0)
where += ' and files < ' + sphinx.escape(navigation.files.max) + ' ';
if(navigation.files.min > 0)
where += ' and files > ' + sphinx.escape(navigation.files.min) + ' ';
}
*/
let search = {};
//args.splice(orderBy && orderBy.length > 0 ? 1 : 0, 1);
//sphinx.query('SELECT * FROM `files` inner join torrents on(torrents.hash = files.hash) WHERE files.path like \'%' + text + '%\' ' + where + ' ' + order + ' LIMIT ?,?', args, function (error, rows, fields) {
sphinx.query('SELECT * FROM `files` WHERE MATCH(?) ' + where + ' ' + order + ' LIMIT ?,?', args, function (error, files, fields) {
if(!files) {
logT('search', error)
sphinx.query('SELECT *, SNIPPET(path, ?, \'around=100\', \'force_all_words=1\') as snipplet FROM `files` WHERE MATCH(?) ' + where + ' ' + order + ' LIMIT ?,?', args, function (error, searchTorrents) {
if(!searchTorrents) {
logTE('search', error)
callback(undefined)
return;
}
if(files.length === 0)
if(searchTorrents.length === 0)
{
logT('search', 'not torrents founded for files search');
callback(undefined)
return;
}
for(const file of files)
for(const torrent of searchTorrents)
{
if(!search[file.hash])
if(!search[torrent.hash])
{
search[file.hash] = { path: [] }
search[torrent.hash] = { path: [] }
}
for(const file of torrent.snipplet.split('\n').filter(text => text.includes('<b>')).slice(0, 25))
{
search[torrent.hash].path.push(file)
}
search[file.hash].path.push(file.path)
}
const inSql = Object.keys(search).map(hash => sphinx.escape(hash)).join(',');
sphinx.query(`SELECT * FROM torrents WHERE hash IN(${inSql})`, (err, torrents) => {
if(!torrents) {
logT('search', err)
logTE('search', err)
return;
}

View File

@ -179,6 +179,10 @@ app.on("ready", async () => {
height: 600
});
// Need for db patcher, to close application
if(stop)
mainWindow.appClose = stop;
dbPatcher(() => {
changeLanguage(appConfig.language, () => setApplicationMenu())

View File

@ -8,11 +8,10 @@ const glob = require("glob")
const asyncForEach = require('./asyncForEach')
const {torrentTypeDetect} = require('../app/content');
const getTorrent = require('./gettorrent')
const startSphinx = require('./sphinx')
const currentVersion = 6
const currentVersion = 7
module.exports = async (callback, mainWindow, sphinxApp) => {
@ -26,7 +25,7 @@ module.exports = async (callback, mainWindow, sphinxApp) => {
}
let patchWindow;
const openPatchWindow = () => {
const openPatchWindow = (closable = false) => {
if(patchWindow)
return
@ -36,10 +35,12 @@ module.exports = async (callback, mainWindow, sphinxApp) => {
if(mainWindow)
mainWindow.hide()
patchWindow = new BrowserWindow({width: 800, height: 400, closable: false})
patchWindow = new BrowserWindow({width: 800, height: 400, closable})
patchWindow.setMenu(null)
patchWindow.on('close', () => mainWindow.appClose())
patchWindow.loadURL("data:text/html;charset=utf-8," + encodeURI(`
<html>
<head><title>Database patching...</title></head>
@ -73,12 +74,24 @@ module.exports = async (callback, mainWindow, sphinxApp) => {
}
#one {
padding: 20px;
}
#long {
font-size: 0.8em;
padding: 10px;
}
#canBreak {
font-size: 0.8em;
padding: 10px;
}
</style>
<script>
const {ipcRenderer} = require('electron')
ipcRenderer.on('reindex', (e, data) =>{
document.getElementById('one').innerHTML = \`Updating \${data.torrent ? 'torrent': 'file'} \${data.index} of \${data.all} [\${data.field} index]\`
document.getElementById('one').innerHTML = \`Updating \${data.torrent ? 'torrent': 'file'} \${data.index} of \${data.all} [\${data.field} index]\`
if(data.longTime)
document.getElementById('long').innerHTML = 'This patch is very long, may be some hours. So you can take some cup of tea, while we perform db patch.'
if(data.canBreak)
document.getElementById('canBreak').innerHTML = 'You can break this patch, and continue when you will have time to patch, it will be resumed.'
})
ipcRenderer.on('optimize', (e, data) =>{
document.getElementById('one').innerHTML = \`Optimization for \${data.field}...\`
@ -95,7 +108,9 @@ module.exports = async (callback, mainWindow, sphinxApp) => {
c4.416,0.15,17.979,1.621,17.683-4.273c-0.292-5.897-11.491-3.241-13.854-6.487c-2.359-3.234-10.023-15.504-7.366-21.104
c2.65-5.59,12.674-21.229,24.463-22.988c11.789-1.777,42.451,7.361,47.459,0c5.012-7.372-6.783-11.512-15.918-28.611
C243.779,80.572,238.768,71.728,220.195,71.427z"/>
<div id="one"></div>
<div id="one"></div>
<div id="long"></div>
<div id="canBreak"></div>
</svg>
</body>
</html>
@ -279,7 +294,7 @@ module.exports = async (callback, mainWindow, sphinxApp) => {
{
delete torrent.contentcategory
delete torrent.contenttype
torrent = await getTorrent(sphinx, null, torrent) // get files
torrent.filesList = (await sphinx.query(`SELECT * FROM files WHERE hash = '${torrent.hash}'`)) || []
torrentTypeDetect(torrent, torrent.filesList)
if(torrent.contentType == 'bad')
{
@ -307,6 +322,133 @@ module.exports = async (callback, mainWindow, sphinxApp) => {
await rebuildTorrentsFull()
await setVersion(6)
}
case 6:
{
openPatchWindow(true)
logT('patcher', 'merge all files in db patch');
let filesMap = {}
let newId = 0;
let fileIndex = 0;
let fileIndexChecked = 0;
let count = (await sphinx.query("select count(*) as cnt from files where size > 0"))[0].cnt;
if(patchWindow)
patchWindow.webContents.send('reindex', {field: 'calculate', index: 'calculate', all: count, longTime: true, canBreak: true})
// found new id
try {
const maxNotPatched = (await sphinx.query("select min(id) as cnt from files where size > 0"))[0].cnt;
newId = (await sphinx.query(`select max(id) as cnt from files where id < ${maxNotPatched}`))[0].cnt | 0;
if(newId <= 0) {
logTE('patcher', 'not founded old if');
newId = 0;
}
} catch(e) {
newId = 0;
}
newId++;
logT('patcher', 'founded newId', newId);
logT('patcher', 'perform optimization');
sphinx.query(`OPTIMIZE INDEX files`)
await sphinxApp.waitOptimized('files')
const descFiles = await sphinx.query(`desc files`);
let isSizeNewExists = false;
let isSizeAlreadyPatched = false;
descFiles.forEach(({Field, Type}) => {
if(Field == 'size_new')
isSizeNewExists = true;
if(Field == 'size' && Type == 'string')
isSizeAlreadyPatched = true;
});
if(!isSizeNewExists)
await sphinx.query("alter table files add column `size_new` string");
else
logT('patcher', 'size_new already exists, skip');
const fileMapWorker = async (keys) => {
let hashCount = 0;
for(let hash of keys)
{
if(filesMap[hash].length == 0)
continue;
fileIndex++;
for(let i = 1; i < filesMap[hash].length; i++)
{
fileIndex++;
filesMap[hash][0].path += '\n' + filesMap[hash][i].path;
filesMap[hash][0].size += '\n' + filesMap[hash][i].size;
}
await sphinx.query(`DELETE FROM files WHERE hash = '${hash}'`);
await sphinx.insertValues('files', {
id: newId++,
hash,
path: filesMap[hash][0].path,
pathIndex: filesMap[hash][0].path,
size_new: filesMap[hash][0].size.toString()
});
logT('patcher', 'patched file', fileIndex, 'from', count, 'hash', hash, 'cIndex', ++hashCount);
if(patchWindow)
patchWindow.webContents.send('reindex', {field: hash, index: fileIndex, all: count, longTime: true, canBreak: true})
delete filesMap[hash];
}
}
if(!isSizeAlreadyPatched)
{
await forBigTable(sphinx, 'files', (file) => {
if(!filesMap[file.hash])
{
filesMap[file.hash] = []
}
filesMap[file.hash].push(file);
}, null, 1000, 'and size > 0', async (lastTorrent) => {
if(fileIndex > 0 && fileIndex - fileIndexChecked > 500000) {
fileIndexChecked = fileIndex;
logT('patcher', 'perform optimization');
sphinx.query(`OPTIMIZE INDEX files`)
await sphinxApp.waitOptimized('files')
}
let keys = Object.keys(filesMap);
if(keys.length > 2000) {
await fileMapWorker(keys.filter(key => key !== lastTorrent.hash));
}
})
let keys = Object.keys(filesMap);
if(keys.length > 0)
await fileMapWorker(keys);
filesMap = null;
}
await sphinx.query("alter table files drop column `size`");
await sphinx.query("alter table files add column `size` string");
fileIndex = 1;
count = (await sphinx.query("select count(*) as cnt from files where size is null"))[0].cnt;
logT('patcher', 'restore files', count);
await forBigTable(sphinx, 'files', async (file) => {
if(!file.size_new)
return
file.size = file.size_new.toString();
delete file.size_new;
await sphinx.replaceValues('files', file, {particial: false, sphinxIndex: {pathIndex: 'path'}});
if(patchWindow)
patchWindow.webContents.send('reindex', {field: file.id, index: fileIndex, all: count, longTime: false, canBreak: true})
logT('patcher', 'restore patched file', fileIndex++, 'from', count, 'hash', file.hash);
}, null, 1000, 'and size is null');
await sphinx.query("alter table files drop column `size_new`");
await setVersion(7)
sphinx.query(`OPTIMIZE INDEX files`)
await sphinxApp.waitOptimized('files')
}
}
logT('patcher', 'db patch done')
sphinx.destroy()

View File

@ -1,24 +1,24 @@
module.exports = (sphinx, table, callback, doneCallback, max = 1000, where = '') => new Promise((done) => {
const checker = (index = 0) => {
sphinx.query(`SELECT * FROM ${table} WHERE id > ${index} ${where} LIMIT ${max}`, (err, torrents) => {
const finish = () => {
if(err)
logTE('sql', 'big table parse error', err)
if(doneCallback)
doneCallback(true)
done(true)
}
module.exports = (sphinx, table, callback, doneCallback, max = 1000, where = '', intermediateCallback = null) => new Promise((done) => {
const checker = async (index = 0) => {
const finish = () => {
if(doneCallback)
doneCallback(true)
done(true)
}
const data = await sphinx.query(`SELECT * FROM ${table} WHERE id > ${index} ${where} LIMIT ${max}`);
if(data.length == 0) {
finish()
return;
}
if(!err && torrents.length > 0)
Promise.all(torrents.map(callback)).then(() => {
if(torrents.length === max)
checker(torrents[torrents.length - 1].id)
else
finish()
})
else
finish()
});
await Promise.all(data.map(callback));
if(intermediateCallback)
await intermediateCallback(data[data.length - 1]);
if(data.length === max)
checker(data[data.length - 1].id)
else
finish()
}
checker()
})

View File

@ -1,8 +1,11 @@
const parseTorrentFiles = require('./parsetTorrentFiles')
module.exports = async (sphinx, hash, torrent) => {
torrent = (torrent && [torrent]) || await sphinx.query(`SELECT * FROM torrents WHERE hash = '${hash}'`)
if(torrent && torrent.length > 0)
{
torrent[0].filesList = (await sphinx.query(`SELECT * FROM files WHERE hash = '${torrent[0].hash}'`)) || []
torrent[0].filesList = parseTorrentFiles(torrent[0].filesList)
return torrent[0]
}
}

View File

@ -0,0 +1,12 @@
module.exports = (filesData) => {
if(Array.isArray(filesData))
filesData = filesData[0]
let path = filesData.path.split('\n');
let size = filesData.size.split('\n');
return path.map((pathString, index) => Object.assign({}, filesData, {
path: pathString,
size: parseInt(size[index])
}))
}

View File

@ -67,7 +67,7 @@ const writeSphinxConfig = async (path, dbPath) => {
rt_attr_string = path
rt_field = pathIndex
rt_attr_string = hash
rt_attr_bigint = size
rt_attr_string = size
}
index version

View File

@ -515,12 +515,13 @@ module.exports = function (send, recive, dataDirectory, version, env)
torrent.id = torrentsId++;
const recheckFiles = (callback) => {
sphinxSingle.query('SELECT count(*) as files_count FROM files WHERE hash = ?', [torrent.hash], function(err, rows) {
if(!rows)
sphinxSingle.query('SELECT id FROM files WHERE hash = ? limit 1', [torrent.hash], function(err, filesRecords) {
if(err) {
logTE('add', 'cannot check files in recheckFiles')
return
}
const db_files = rows[0]['files_count'];
if(db_files !== torrent.files)
if(!filesRecords || filesRecords.length == 0)
{
callback()
}
@ -534,12 +535,23 @@ module.exports = function (send, recive, dataDirectory, version, env)
return;
}
filesList.forEach((file) => {
file.id = filesId++;
file.pathIndex = file.path;
});
let path = '';
let size = '';
for(const file of filesList)
{
path += file.path + '\n';
size += file.size + '\n';
}
path = path.slice(0, -1);
size = size.slice(0, -1);
sphinxSingle.insertValues('files', filesList, function(err, result) {
sphinxSingle.insertValues('files', {
id: torrent.id,
hash: torrent.hash,
path,
pathIndex: path,
size
}, function(err, result) {
if(!result) {
console.error(err);
return

View File

@ -17,11 +17,11 @@ describe("sphinx", () => {
})
it("insert", function(done) {
sphinx.query("INSERT INTO files(id, hash, path, pathIndex, size) VALUES(50001, 'a', 'bashaa', 'bashaa', 50)", (err) => {
sphinx.query("INSERT INTO files(id, hash, path, pathIndex, size) VALUES(50001, 'a', 'bashaa', 'bashaa', '50')", (err) => {
if(err)
throw new Error(err)
sphinx.query("INSERT INTO files(id, hash, path, pathIndex, size) VALUES(50002, 'b', 'biotu', 'biotu', 30)", (err) => {
sphinx.query("INSERT INTO files(id, hash, path, pathIndex, size) VALUES(50002, 'b', 'biotu', 'biotu', '30')", (err) => {
if(err)
throw new Error(err)
@ -36,8 +36,8 @@ describe("sphinx", () => {
throw new Error(err)
if(result.length !== 1)
throw new Error('not one result')
if(result[0].size !== 50)
if(result[0].size != 50)
throw new Error('not 50 in field')
done()
@ -53,7 +53,7 @@ describe("sphinx", () => {
if(result[0].hash !== 'a')
throw new Error('not a in hash')
if(result[0].size !== 50)
if(result[0].size != 50)
throw new Error('not 50 in field')
done()