diff options
author | Egil Moeller <egil.moller@freecode.no> | 2010-04-11 02:25:39 +0200 |
---|---|---|
committer | Egil Moeller <egil.moller@freecode.no> | 2010-04-11 02:25:39 +0200 |
commit | 7cb7e6de9040e6f0d21390fede044200a0f1d198 (patch) | |
tree | d9f32fd198305ecd23d4afce08000489edd0187d | |
parent | 903fc968581ca7309a860d9336762d2147342754 (diff) | |
download | etherpad-7cb7e6de9040e6f0d21390fede044200a0f1d198.tar.gz etherpad-7cb7e6de9040e6f0d21390fede044200a0f1d198.tar.xz etherpad-7cb7e6de9040e6f0d21390fede044200a0f1d198.zip |
Added an URL indexer, it currently only greps out URLs from pads and stores them in a separate, searchable table, but it doesn't actually provide a way to use this info.
Diffstat (limited to '')
-rw-r--r-- | etherpad/src/plugins/urlIndexer/hooks.js | 39 | ||||
-rw-r--r-- | etherpad/src/plugins/urlIndexer/main.js | 32 |
2 files changed, 71 insertions, 0 deletions
diff --git a/etherpad/src/plugins/urlIndexer/hooks.js b/etherpad/src/plugins/urlIndexer/hooks.js new file mode 100644 index 0000000..922150e --- /dev/null +++ b/etherpad/src/plugins/urlIndexer/hooks.js @@ -0,0 +1,39 @@ +import("etherpad.log"); +import("dispatch.{Dispatcher,PrefixMatcher,forward}"); +import("sqlbase.sqlobj"); + +REGEX_WORDCHAR = /[\u0030-\u0039\u0041-\u005A\u0061-\u007A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u1FFF\u3040-\u9FFF\uF900-\uFDFF\uFE70-\uFEFE\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFDC]/; +REGEX_URLCHAR = new RegExp('('+/[-:@a-zA-Z0-9_.,~%+\/\\?=&#;()$]/.source+'|'+REGEX_WORDCHAR.source+')'); +REGEX_URL = new RegExp(/(?:(?:https?|s?ftp|ftps|file|smb|afp|nfs|(x-)?man|gopher|txmt):\/\/|mailto:)/.source+REGEX_URLCHAR.source+'*(?![:.,;])'+REGEX_URLCHAR.source, 'g'); + +function padModelWriteToDB(args) { + /* Update tags for the pad */ + + var new_urls = args.pad.text().match(REGEX_URL); + if (new_urls == null) new_urls = new Array(); + var new_urls_str = new_urls.join(' ') + + var old_urls_row = sqlobj.selectSingle("PAD_URL_CACHE", { PAD_ID: args.padId }); + var old_urls_str; + if (old_urls_row !== null) + old_urls_str = old_urls_row['URLS']; + else + old_urls_str = ''; + + var old_urls = old_urls_str != '' ? old_urls_str.split(' ') : new Array(); + + if (new_urls_str != old_urls_str) { + log.info({message: 'Updating urls', new_urls:new_urls, old_urls:old_urls}); + + if (old_urls_row) + sqlobj.update("PAD_URL_CACHE", {PAD_ID: args.padId }, {URLS: new_urls.join(' ')}); + else + sqlobj.insert("PAD_URL_CACHE", {PAD_ID: args.padId, URLS: new_urls.join(' ')}); + + sqlobj.deleteRows("PAD_URL", {PAD_ID: args.padId}); + + for (i = 0; i < new_urls.length; i++) { + sqlobj.insert("PAD_URL", {PAD_ID: args.padId, URL: new_urls[i]}); + } + } +}
\ No newline at end of file diff --git a/etherpad/src/plugins/urlIndexer/main.js b/etherpad/src/plugins/urlIndexer/main.js new file mode 100644 index 0000000..79bb019 --- /dev/null +++ b/etherpad/src/plugins/urlIndexer/main.js @@ -0,0 +1,32 @@ +import("etherpad.log"); +import("plugins.urlIndexer.hooks"); +import("sqlbase.sqlobj"); +import("sqlbase.sqlcommon"); + +function init() { + this.hooks = ['padModelWriteToDB']; + this.description = 'Indexes URLs linked to in pads so that they can be displayed outside pads, searched for etc.'; + this.padModelWriteToDB = hooks.padModelWriteToDB; + + this.install = install; + this.uninstall = uninstall; +} + +function install() { + log.info("Installing urlIndexer"); + + sqlobj.createTable('PAD_URL', { + PAD_ID: 'varchar(128) character set utf8 collate utf8_bin not null references PAD_META(ID)', + URL: 'varchar(1024) character set utf8 collate utf8_bin not null', + }); + + sqlobj.createTable('PAD_URL_CACHE', { + PAD_ID: 'varchar(128) character set utf8 collate utf8_bin unique not null references PAD_META(ID)', + URLS: 'text collate utf8_bin not null', + }); +} + +function uninstall() { + log.info("Uninstalling urlIndexer"); +} + |