X-Git-Url: https://scm.cri.minesparis.psl.eu/git/Plinn.git/blobdiff_plain/3c4367d8e03450e9a73e61f4247145d2b6c86a33..959d888c17d1403d2eeecc19bc4b5e2c8d1debf6:/catalog.py diff --git a/catalog.py b/catalog.py deleted file mode 100644 index 7e49083..0000000 --- a/catalog.py +++ /dev/null @@ -1,355 +0,0 @@ -# -*- coding: utf-8 -*- -from App.class_init import InitializeClass -from AccessControl import ClassSecurityInfo -from Products.CMFCore.interfaces import IIndexableObject -from Products.CMFCore.CatalogTool import CatalogTool as BaseCatalogTool -from Products.CMFCore.CatalogTool import IndexableObjectWrapper -from Products.PageTemplates.PageTemplateFile import PageTemplateFile -from Products.CMFCore.permissions import ModifyPortalContent, ManagePortal -from zope.component import queryMultiAdapter -from Products.ZCatalog.Catalog import Catalog -import transaction -from solr import * - -# imports for Catalog class -from Products.PluginIndexes.interfaces import ILimitedResultIndex -from Products.ZCatalog.Lazy import LazyMap, LazyCat, LazyValues -from BTrees.IIBTree import intersection, IISet -from BTrees.IIBTree import weightedIntersection -import warnings - -_VOLATILE_SOLR_NAME = '_v_solrConnection' - -class SolrTransactionHook : - ''' commit solr couplé sur le commit de la ZODB ''' - def __init__(self, context, con) : - self.context = context - self.con = con - - def __call__(self, status) : - if status : - self.con.commit() - self.con.close() - else : - self.con.close() - try : - delattr(self.context, _VOLATILE_SOLR_NAME) - except AttributeError : - pass - -class CatalogTool(BaseCatalogTool) : - meta_type = 'Plinn Catalog' - security = ClassSecurityInfo() - manage_options = (BaseCatalogTool.manage_options[:5] + - ({'label' : 'Solr', 'action' : 'manage_solr'},) + - BaseCatalogTool.manage_options[5:]) - manage_solr = PageTemplateFile('www/manage_solr.pt', globals(), __name__='manage_solr') - - - - def __init__(self, idxs=[]) : - super(CatalogTool, self).__init__() - self._catalog = DelegatedCatalog(self) - self.solr_url = 'http://localhost:8983/solr' - self.delegatedIndexes = ('Title', 'Description', 'SearchableText') - - security.declarePublic('getDelegatedIndexes') - def getDelegatedIndexes(self) : - """ read the method name """ - return self.delegatedIndexes - - security.declareProtected(ManagePortal, 'setSolrProperties') - def setSolrProperties(self, url, indexes, REQUEST=None) : - """ set Solr server url and delegated indexes """ - self.solr_url = url - self.delegatedIndexes = tuple([i.strip() for i in indexes if i.strip()]) - if REQUEST : - REQUEST.RESPONSE.redirect(self.absolute_url() + '/manage_solr?manage_tabs_message=Saved changes.') - - def _getSolrConnection(self) : - if not hasattr(self, _VOLATILE_SOLR_NAME) : - con = SolrConnection(self.solr_url) - setattr(self, _VOLATILE_SOLR_NAME, con) - txn = transaction.get() - txn.addAfterCommitHook(SolrTransactionHook(self, con)) - return getattr(self, _VOLATILE_SOLR_NAME) - - security.declarePrivate('solrAdd') - def solrAdd(self, w, uid, idxs) : - idxs = idxs if idxs else self.delegatedIndexes - # Filter out delegated indexes - idxs = [i for i in idxs if i in self.delegatedIndexes] - data = {'id' : uid} - for name in idxs : - attr = getattr(w, name, '') - data[name] = attr() if callable(attr) else attr - c = self._getSolrConnection() - c.add(**data) - - # PortalCatalog api overloads - def catalog_object(self, obj, uid=None, idxs=None, update_metadata=1, - pghandler=None): - # Wraps the object with workflow and accessibility - # information just before cataloging. - if IIndexableObject.providedBy(obj): - w = obj - else: - w = queryMultiAdapter( (obj, self), IIndexableObject ) - if w is None: - # BBB - w = IndexableObjectWrapper(obj, self) - - idxs_ = idxs - if idxs: - # Filter out invalid indexes. - valid_indexes = self._catalog.indexes.keys() - idxs_ = [i for i in idxs if i in valid_indexes] - - super(CatalogTool, self).catalog_object(w, uid, idxs_, update_metadata, pghandler) - self.solrAdd(w, uid, idxs) - - security.declarePrivate('reindexObject') - def reindexObject(self, object, idxs=[], update_metadata=1, uid=None): - """Update catalog after object data has changed. - - The optional idxs argument is a list of specific indexes - to update (all of them by default). - - The update_metadata flag controls whether the object's - metadata record is updated as well. - - If a non-None uid is passed, it will be used as the catalog uid - for the object instead of its physical path. - """ - if uid is None: - uid = self.__url(object) - - self.catalog_object(object, uid, idxs, update_metadata) - - security.declarePrivate('unindexObject') - def unindexObject(self, object): - """Remove from catalog. - """ - super(CatalogTool, self).unindexObject(object) - c = self._getSolrConnection() - url = self.__url(object) - c.delete(id=url) - -InitializeClass(CatalogTool) - - -class DelegatedCatalog(Catalog) : - '''C'est ici qu'on délègue effectivement à Solr ''' - - def __init__(self, zcat, brains=None) : - Catalog.__init__(self, brains=brains) - self.zcat = zcat - - def delegateSearch(self, query, plan) : - ''' - retours faux : - None signifie : pas de délégation, il faut continuer à interroger les autres index. - IISet() vide : pas de résultat lors de la délégation, on peut arrêter la recherche. - ''' - indexes = set(query.keys()).intersection(set(self.zcat.delegatedIndexes)) - if not indexes : - return None - delegatedQuery = {} - for i in indexes : - delegatedQuery[i] = query.pop(i) - try : plan.remove(i) - except ValueError : pass - c = SolrConnection(self.zcat.solr_url) - q =' AND '.join(['%s:"%s"' % item for item in delegatedQuery.items()]) - resp = c.query(q, fields='id', rows=len(self)) - c.close() - return IISet(filter(None, [self.uids.get(r['id']) for r in resp.results])) - - def search(self, query, sort_index=None, reverse=0, limit=None, merge=1): - """Iterate through the indexes, applying the query to each one. If - merge is true then return a lazy result set (sorted if appropriate) - otherwise return the raw (possibly scored) results for later merging. - Limit is used in conjuntion with sorting or scored results to inform - the catalog how many results you are really interested in. The catalog - can then use optimizations to save time and memory. The number of - results is not guaranteed to fall within the limit however, you should - still slice or batch the results as usual.""" - - rs = None # resultset - - # Indexes fulfill a fairly large contract here. We hand each - # index the query mapping we are given (which may be composed - # of some combination of web request, kw mappings or plain old dicts) - # and the index decides what to do with it. If the index finds work - # for itself in the query, it returns the results and a tuple of - # the attributes that were used. If the index finds nothing for it - # to do then it returns None. - - # Canonicalize the request into a sensible query before passing it on - query = self.make_query(query) - - cr = self.getCatalogPlan(query) - cr.start() - - plan = cr.plan() - if not plan: - plan = self._sorted_search_indexes(query) - - # délégation - rs = self.delegateSearch(query, plan) - if rs is not None and not rs : - return LazyCat([]) - - indexes = self.indexes.keys() - for i in plan: - if i not in indexes: - # We can have bogus keys or the plan can contain index names - # that have been removed in the meantime - continue - - index = self.getIndex(i) - _apply_index = getattr(index, "_apply_index", None) - if _apply_index is None: - continue - - cr.start_split(i) - limit_result = ILimitedResultIndex.providedBy(index) - if limit_result: - r = _apply_index(query, rs) - else: - r = _apply_index(query) - - if r is not None: - r, u = r - # Short circuit if empty result - # BBB: We can remove the "r is not None" check in Zope 2.14 - # once we don't need to support the "return everything" case - # anymore - if r is not None and not r: - cr.stop_split(i, result=None, limit=limit_result) - return LazyCat([]) - - # provide detailed info about the pure intersection time - intersect_id = i + '#intersection' - cr.start_split(intersect_id) - # weightedIntersection preserves the values from any mappings - # we get, as some indexes don't return simple sets - if hasattr(rs, 'items') or hasattr(r, 'items'): - _, rs = weightedIntersection(rs, r) - else: - rs = intersection(rs, r) - - cr.stop_split(intersect_id) - - # consider the time it takes to intersect the index result with - # the total resultset to be part of the index time - cr.stop_split(i, result=r, limit=limit_result) - if not rs: - break - else: - cr.stop_split(i, result=None, limit=limit_result) - - # Try to deduce the sort limit from batching arguments - b_start = int(query.get('b_start', 0)) - b_size = query.get('b_size', None) - if b_size is not None: - b_size = int(b_size) - - if b_size is not None: - limit = b_start + b_size - elif limit and b_size is None: - b_size = limit - - if rs is None: - # None of the indexes found anything to do with the query - # We take this to mean that the query was empty (an empty filter) - # and so we return everything in the catalog - warnings.warn('Your query %s produced no query restriction. ' - 'Currently the entire catalog content is returned. ' - 'In Zope 2.14 this will result in an empty LazyCat ' - 'to be returned.' % repr(cr.make_key(query)), - DeprecationWarning, stacklevel=3) - - rlen = len(self) - if sort_index is None: - sequence, slen = self._limit_sequence(self.data.items(), rlen, - b_start, b_size) - result = LazyMap(self.instantiate, sequence, slen, - actual_result_count=rlen) - else: - cr.start_split('sort_on') - result = self.sortResults( - self.data, sort_index, reverse, limit, merge, - actual_result_count=rlen, b_start=b_start, - b_size=b_size) - cr.stop_split('sort_on', None) - elif rs: - # We got some results from the indexes. - # Sort and convert to sequences. - # XXX: The check for 'values' is really stupid since we call - # items() and *not* values() - rlen = len(rs) - if sort_index is None and hasattr(rs, 'items'): - # having a 'items' means we have a data structure with - # scores. Build a new result set, sort it by score, reverse - # it, compute the normalized score, and Lazify it. - - if not merge: - # Don't bother to sort here, return a list of - # three tuples to be passed later to mergeResults - # note that data_record_normalized_score_ cannot be - # calculated and will always be 1 in this case - getitem = self.__getitem__ - result = [(score, (1, score, rid), getitem) - for rid, score in rs.items()] - else: - cr.start_split('sort_on') - - rs = rs.byValue(0) # sort it by score - max = float(rs[0][0]) - - # Here we define our getter function inline so that - # we can conveniently store the max value as a default arg - # and make the normalized score computation lazy - def getScoredResult(item, max=max, self=self): - """ - Returns instances of self._v_brains, or whatever is - passed into self.useBrains. - """ - score, key = item - r=self._v_result_class(self.data[key])\ - .__of__(aq_parent(self)) - r.data_record_id_ = key - r.data_record_score_ = score - r.data_record_normalized_score_ = int(100. * score / max) - return r - - sequence, slen = self._limit_sequence(rs, rlen, b_start, - b_size) - result = LazyMap(getScoredResult, sequence, slen, - actual_result_count=rlen) - cr.stop_split('sort_on', None) - - elif sort_index is None and not hasattr(rs, 'values'): - # no scores - if hasattr(rs, 'keys'): - rs = rs.keys() - sequence, slen = self._limit_sequence(rs, rlen, b_start, - b_size) - result = LazyMap(self.__getitem__, sequence, slen, - actual_result_count=rlen) - else: - # sort. If there are scores, then this block is not - # reached, therefore 'sort-on' does not happen in the - # context of a text index query. This should probably - # sort by relevance first, then the 'sort-on' attribute. - cr.start_split('sort_on') - result = self.sortResults(rs, sort_index, reverse, limit, - merge, actual_result_count=rlen, b_start=b_start, - b_size=b_size) - cr.stop_split('sort_on', None) - else: - # Empty result set - result = LazyCat([]) - cr.stop() - return result