1 # -*- coding: utf-8 -*-
2 from App
.class_init
import InitializeClass
3 from AccessControl
import ClassSecurityInfo
4 from Products
.CMFCore
.interfaces
import IIndexableObject
5 from Products
.CMFCore
.CatalogTool
import CatalogTool
as BaseCatalogTool
6 from Products
.CMFCore
.CatalogTool
import IndexableObjectWrapper
7 from Products
.PageTemplates
.PageTemplateFile
import PageTemplateFile
8 from Products
.CMFCore
.permissions
import ModifyPortalContent
, ManagePortal
9 from zope
.component
import queryMultiAdapter
10 from Products
.ZCatalog
.Catalog
import Catalog
14 # imports for Catalog class
15 from Products
.PluginIndexes
.interfaces
import ILimitedResultIndex
16 from Products
.ZCatalog
.Lazy
import LazyMap
, LazyCat
, LazyValues
17 from BTrees
.IIBTree
import intersection
, IISet
18 from BTrees
.IIBTree
import weightedIntersection
21 _VOLATILE_SOLR_NAME
= '_v_solrConnection'
23 class SolrTransactionHook
:
24 ''' commit solr couplé sur le commit de la ZODB '''
25 def __init__(self
, context
, con
) :
26 self
.context
= context
29 def __call__(self
, status
) :
36 delattr(self
.context
, _VOLATILE_SOLR_NAME
)
37 except AttributeError :
40 class CatalogTool(BaseCatalogTool
) :
41 meta_type
= 'Plinn Catalog'
42 security
= ClassSecurityInfo()
43 manage_options
= (BaseCatalogTool
.manage_options
[:5] +
44 ({'label' : 'Solr', 'action' : 'manage_solr'},) +
45 BaseCatalogTool
.manage_options
[5:])
46 manage_solr
= PageTemplateFile('www/manage_solr', globals(), __name__
='manage_solr')
50 def __init__(self
, idxs
=[]) :
51 super(CatalogTool
, self
).__init
__()
52 self
._catalog
= DelegatedCatalog(self
)
53 self
.solr_url
= 'http://localhost:8983/solr'
54 self
.delegatedIndexes
= ('Title', 'Description', 'SearchableText')
56 security
.declarePublic('getDelegatedIndexes')
57 def getDelegatedIndexes(self
) :
58 """ read the method name """
59 return self
.delegatedIndexes
61 security
.declareProtected(ManagePortal
, 'setDelegatedIndexes')
62 def setDelegatedIndexes(self
, indexes
, REQUEST
=None) :
63 """setDelegatedIndexes documentation"""
64 self
.delegatedIndexes
= tuple([i
.strip() for i
in indexes
if i
.strip()])
66 REQUEST
.RESPONSE
.redirect(self
.absolute_url() + '/manage_solr?manage_tabs_message=Saved changes.')
68 def _getSolrConnection(self
) :
69 if not hasattr(self
, _VOLATILE_SOLR_NAME
) :
70 con
= SolrConnection(self
.solr_url
)
71 setattr(self
, _VOLATILE_SOLR_NAME
, con
)
72 txn
= transaction
.get()
73 txn
.addAfterCommitHook(SolrTransactionHook(self
, con
))
74 return getattr(self
, _VOLATILE_SOLR_NAME
)
76 security
.declarePrivate('solrAdd')
77 def solrAdd(self
, object, idxs
=[], uid
=None) :
78 if IIndexableObject
.providedBy(object):
81 w
= queryMultiAdapter( (object, self
), IIndexableObject
)
84 w
= IndexableObjectWrapper(object, self
)
86 uid
= uid
if uid
else self
.__url
(object)
87 idxs
= idxs
if idxs
!=[] else self
.delegatedIndexes
90 attr
= getattr(w
, name
, '')
91 data
[name
] = attr() if callable(attr
) else attr
92 c
= self
._getSolrConnection
()
96 # PortalCatalog api overloads
97 security
.declareProtected(ModifyPortalContent
, 'indexObject')
98 def indexObject(self
, object) :
99 """ Add to catalog and send to Solr """
100 super(CatalogTool
, self
).indexObject(object)
103 security
.declarePrivate('reindexObject')
104 def reindexObject(self
, object, idxs
=[], update_metadata
=1, uid
=None):
105 super(CatalogTool
, self
).reindexObject(object,
107 update_metadata
=update_metadata
,
110 # Filter out invalid indexes.
111 valid_indexes
= self
._catalog
.indexes
.keys()
112 idxs
= [i
for i
in idxs
if i
in valid_indexes
and i
in self
.delegatedIndexes
]
114 idxs
= self
.delegatedIndexes
117 self
.solrAdd(object, idxs
=idxs
, uid
=uid
)
119 security
.declarePrivate('unindexObject')
120 def unindexObject(self
, object):
121 """Remove from catalog.
123 super(CatalogTool
, self
).unindexObject(object)
124 c
= self
._getSolrConnection
()
125 url
= self
.__url
(object)
128 InitializeClass(CatalogTool
)
131 class DelegatedCatalog(Catalog
) :
132 '''C'est ici qu'on délègue effectivement à Solr '''
134 def __init__(self
, zcat
, brains
=None) :
135 Catalog
.__init
__(self
, brains
=brains
)
138 def delegateSearch(self
, query
, plan
) :
141 None signifie : pas de délégation, il faut continuer à interroger les autres index.
142 IISet() vide : pas de résultat lors de la délégation, on peut arrêter la recherche.
144 indexes
= set(query
.keys()).intersection(set(self
.zcat
.delegatedIndexes
))
149 delegatedQuery
[i
] = query
.pop(i
)
151 except ValueError : pass
152 c
= SolrConnection(self
.zcat
.solr_url
)
153 q
=' AND '.join(['%s:"%s"' % item
for item
in delegatedQuery
.items()])
154 resp
= c
.query(q
, fields
='id', rows
=len(self
))
156 return IISet(filter(None, [self
.uids
.get(r
['id']) for r
in resp
.results
]))
158 def search(self
, query
, sort_index
=None, reverse
=0, limit
=None, merge
=1):
159 """Iterate through the indexes, applying the query to each one. If
160 merge is true then return a lazy result set (sorted if appropriate)
161 otherwise return the raw (possibly scored) results for later merging.
162 Limit is used in conjuntion with sorting or scored results to inform
163 the catalog how many results you are really interested in. The catalog
164 can then use optimizations to save time and memory. The number of
165 results is not guaranteed to fall within the limit however, you should
166 still slice or batch the results as usual."""
168 rs
= None # resultset
170 # Indexes fulfill a fairly large contract here. We hand each
171 # index the query mapping we are given (which may be composed
172 # of some combination of web request, kw mappings or plain old dicts)
173 # and the index decides what to do with it. If the index finds work
174 # for itself in the query, it returns the results and a tuple of
175 # the attributes that were used. If the index finds nothing for it
176 # to do then it returns None.
178 # Canonicalize the request into a sensible query before passing it on
179 query
= self
.make_query(query
)
181 cr
= self
.getCatalogPlan(query
)
186 plan
= self
._sorted
_search
_indexes
(query
)
189 rs
= self
.delegateSearch(query
, plan
)
190 if rs
is not None and not rs
:
193 indexes
= self
.indexes
.keys()
196 # We can have bogus keys or the plan can contain index names
197 # that have been removed in the meantime
200 index
= self
.getIndex(i
)
201 _apply_index
= getattr(index
, "_apply_index", None)
202 if _apply_index
is None:
206 limit_result
= ILimitedResultIndex
.providedBy(index
)
208 r
= _apply_index(query
, rs
)
210 r
= _apply_index(query
)
214 # Short circuit if empty result
215 # BBB: We can remove the "r is not None" check in Zope 2.14
216 # once we don't need to support the "return everything" case
218 if r
is not None and not r
:
219 cr
.stop_split(i
, result
=None, limit
=limit_result
)
222 # provide detailed info about the pure intersection time
223 intersect_id
= i
+ '#intersection'
224 cr
.start_split(intersect_id
)
225 # weightedIntersection preserves the values from any mappings
226 # we get, as some indexes don't return simple sets
227 if hasattr(rs
, 'items') or hasattr(r
, 'items'):
228 _
, rs
= weightedIntersection(rs
, r
)
230 rs
= intersection(rs
, r
)
232 cr
.stop_split(intersect_id
)
234 # consider the time it takes to intersect the index result with
235 # the total resultset to be part of the index time
236 cr
.stop_split(i
, result
=r
, limit
=limit_result
)
240 cr
.stop_split(i
, result
=None, limit
=limit_result
)
242 # Try to deduce the sort limit from batching arguments
243 b_start
= int(query
.get('b_start', 0))
244 b_size
= query
.get('b_size', None)
245 if b_size
is not None:
248 if b_size
is not None:
249 limit
= b_start
+ b_size
250 elif limit
and b_size
is None:
254 # None of the indexes found anything to do with the query
255 # We take this to mean that the query was empty (an empty filter)
256 # and so we return everything in the catalog
257 warnings
.warn('Your query %s produced no query restriction. '
258 'Currently the entire catalog content is returned. '
259 'In Zope 2.14 this will result in an empty LazyCat '
260 'to be returned.' % repr(cr
.make_key(query
)),
261 DeprecationWarning, stacklevel
=3)
264 if sort_index
is None:
265 sequence
, slen
= self
._limit
_sequence
(self
.data
.items(), rlen
,
267 result
= LazyMap(self
.instantiate
, sequence
, slen
,
268 actual_result_count
=rlen
)
270 cr
.start_split('sort_on')
271 result
= self
.sortResults(
272 self
.data
, sort_index
, reverse
, limit
, merge
,
273 actual_result_count
=rlen
, b_start
=b_start
,
275 cr
.stop_split('sort_on', None)
277 # We got some results from the indexes.
278 # Sort and convert to sequences.
279 # XXX: The check for 'values' is really stupid since we call
280 # items() and *not* values()
282 if sort_index
is None and hasattr(rs
, 'items'):
283 # having a 'items' means we have a data structure with
284 # scores. Build a new result set, sort it by score, reverse
285 # it, compute the normalized score, and Lazify it.
288 # Don't bother to sort here, return a list of
289 # three tuples to be passed later to mergeResults
290 # note that data_record_normalized_score_ cannot be
291 # calculated and will always be 1 in this case
292 getitem
= self
.__getitem
__
293 result
= [(score
, (1, score
, rid
), getitem
)
294 for rid
, score
in rs
.items()]
296 cr
.start_split('sort_on')
298 rs
= rs
.byValue(0) # sort it by score
299 max = float(rs
[0][0])
301 # Here we define our getter function inline so that
302 # we can conveniently store the max value as a default arg
303 # and make the normalized score computation lazy
304 def getScoredResult(item
, max=max, self
=self
):
306 Returns instances of self._v_brains, or whatever is
307 passed into self.useBrains.
310 r
=self
._v
_result
_class
(self
.data
[key
])\
311 .__of
__(aq_parent(self
))
312 r
.data_record_id_
= key
313 r
.data_record_score_
= score
314 r
.data_record_normalized_score_
= int(100. * score
/ max)
317 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
319 result
= LazyMap(getScoredResult
, sequence
, slen
,
320 actual_result_count
=rlen
)
321 cr
.stop_split('sort_on', None)
323 elif sort_index
is None and not hasattr(rs
, 'values'):
325 if hasattr(rs
, 'keys'):
327 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
329 result
= LazyMap(self
.__getitem
__, sequence
, slen
,
330 actual_result_count
=rlen
)
332 # sort. If there are scores, then this block is not
333 # reached, therefore 'sort-on' does not happen in the
334 # context of a text index query. This should probably
335 # sort by relevance first, then the 'sort-on' attribute.
336 cr
.start_split('sort_on')
337 result
= self
.sortResults(rs
, sort_index
, reverse
, limit
,
338 merge
, actual_result_count
=rlen
, b_start
=b_start
,
340 cr
.stop_split('sort_on', None)