1 # -*- coding: utf-8 -*-
2 from App
.class_init
import InitializeClass
3 from AccessControl
import ClassSecurityInfo
4 from Products
.CMFCore
.interfaces
import IIndexableObject
5 from Products
.CMFCore
.CatalogTool
import CatalogTool
as BaseCatalogTool
6 from Products
.CMFCore
.CatalogTool
import IndexableObjectWrapper
7 from Products
.PageTemplates
.PageTemplateFile
import PageTemplateFile
8 from Products
.CMFCore
.permissions
import ModifyPortalContent
9 from zope
.component
import queryMultiAdapter
10 from Products
.ZCatalog
.Catalog
import Catalog
14 # imports for Catalog class
15 from Products
.PluginIndexes
.interfaces
import ILimitedResultIndex
16 from Products
.ZCatalog
.Lazy
import LazyMap
, LazyCat
, LazyValues
17 from BTrees
.IIBTree
import intersection
, IISet
18 from BTrees
.IIBTree
import weightedIntersection
21 class SolrTransactionHook
:
22 ''' commit solr couplé sur le commit de la ZODB '''
23 def __init__(self
, connection
) :
24 self
.connection
= connection
26 def __call__(self
, status
) :
28 self
.connection
.commit()
29 self
.connection
.close()
31 self
.connection
.close()
33 class CatalogTool(BaseCatalogTool
) :
34 meta_type
= 'Legivoc Catalog'
35 security
= ClassSecurityInfo()
36 manage_options
= (BaseCatalogTool
.manage_options
[:5] +
37 ({'label' : 'Solr', 'action' : 'manage_solr'},) +
38 BaseCatalogTool
.manage_options
[5:])
39 manage_solr
= PageTemplateFile('www/manage_solr', globals())
42 def __init__(self
, idxs
=[]) :
43 super(CatalogTool
, self
).__init
__()
44 self
._catalog
= DelegatedCatalog(self
)
45 self
.solr_url
= 'http://localhost:8983/solr'
46 self
.delegatedIndexes
= ('Title', 'Description', 'SearchableText')
48 security
.declarePrivate('solrAdd')
49 def solrAdd(self
, object, idxs
=[], uid
=None) :
50 if IIndexableObject
.providedBy(object):
53 w
= queryMultiAdapter( (object, self
), IIndexableObject
)
56 w
= IndexableObjectWrapper(object, self
)
58 uid
= uid
if uid
else self
.__url
(object)
59 idxs
= idxs
if idxs
!=[] else self
.delegatedIndexes
62 attr
= getattr(w
, name
, '')
63 data
[name
] = attr() if callable(attr
) else attr
64 c
= SolrConnection(self
.solr_url
)
66 txn
= transaction
.get()
67 txn
.addAfterCommitHook(SolrTransactionHook(c
))
70 # PortalCatalog api overloads
71 security
.declareProtected(ModifyPortalContent
, 'indexObject')
72 def indexObject(self
, object) :
73 """ Add to catalog and send to Solr """
74 super(CatalogTool
, self
).indexObject(object)
77 security
.declarePrivate('reindexObject')
78 def reindexObject(self
, object, idxs
=[], update_metadata
=1, uid
=None):
79 super(CatalogTool
, self
).reindexObject(object,
81 update_metadata
=update_metadata
,
84 # Filter out invalid indexes.
85 valid_indexes
= self
._catalog
.indexes
.keys()
86 idxs
= [i
for i
in idxs
if i
in valid_indexes
and i
in self
.delegatedIndexes
]
88 idxs
= self
.delegatedIndexes
91 self
.solrAdd(object, idxs
=idxs
, uid
=uid
)
93 security
.declarePrivate('unindexObject')
94 def unindexObject(self
, object):
95 """Remove from catalog.
97 super(CatalogTool
, self
).unindexObject(object)
98 c
= SolrConnection(self
.solr_url
)
99 url
= self
.__url
(object)
101 txn
= transaction
.get()
102 txn
.addAfterCommitHook(SolrTransactionHook(c
))
104 InitializeClass(CatalogTool
)
107 class DelegatedCatalog(Catalog
) :
108 '''C'est ici qu'on délègue effectivement à Solr '''
110 def __init__(self
, zcat
, brains
=None) :
111 Catalog
.__init
__(self
, brains
=brains
)
114 def getDelegatedIndexes(self
) :
115 return ('Title', 'Description', 'SearchableText') # <= TODO virer cette ligne
116 return self
.zcat
.delegatedIndexes
118 def delegateSearch(self
, query
, plan
) :
121 None signifie : pas de délégation, il faut continue à interroger les autres index
122 IISet() vide : pas de résultat lors de la délégation, on peut arrêter la recherche.
124 indexes
= set(plan
).intersection(set(self
.getDelegatedIndexes()))
127 delegatedQuery
[i
] = query
.pop(i
)
129 if not delegatedQuery
:
131 c
= SolrConnection('http://localhost:8983/solr')
132 q
=' AND '.join(['%s:"%s"' % item
for item
in delegatedQuery
.items()])
133 resp
= c
.query(q
, fields
='id')
134 return IISet(filter(None, [self
.uids
.get(r
['id']) for r
in resp
.results
]))
136 def search(self
, query
, sort_index
=None, reverse
=0, limit
=None, merge
=1):
137 """Iterate through the indexes, applying the query to each one. If
138 merge is true then return a lazy result set (sorted if appropriate)
139 otherwise return the raw (possibly scored) results for later merging.
140 Limit is used in conjuntion with sorting or scored results to inform
141 the catalog how many results you are really interested in. The catalog
142 can then use optimizations to save time and memory. The number of
143 results is not guaranteed to fall within the limit however, you should
144 still slice or batch the results as usual."""
146 rs
= None # resultset
148 # Indexes fulfill a fairly large contract here. We hand each
149 # index the query mapping we are given (which may be composed
150 # of some combination of web request, kw mappings or plain old dicts)
151 # and the index decides what to do with it. If the index finds work
152 # for itself in the query, it returns the results and a tuple of
153 # the attributes that were used. If the index finds nothing for it
154 # to do then it returns None.
156 # Canonicalize the request into a sensible query before passing it on
157 query
= self
.make_query(query
)
159 cr
= self
.getCatalogPlan(query
)
164 plan
= self
._sorted
_search
_indexes
(query
)
167 rs
= self
.delegateSearch(query
, plan
)
168 if rs
is not None and not rs
:
171 indexes
= self
.indexes
.keys()
174 # We can have bogus keys or the plan can contain index names
175 # that have been removed in the meantime
178 index
= self
.getIndex(i
)
179 _apply_index
= getattr(index
, "_apply_index", None)
180 if _apply_index
is None:
184 limit_result
= ILimitedResultIndex
.providedBy(index
)
186 r
= _apply_index(query
, rs
)
188 r
= _apply_index(query
)
192 # Short circuit if empty result
193 # BBB: We can remove the "r is not None" check in Zope 2.14
194 # once we don't need to support the "return everything" case
196 if r
is not None and not r
:
197 cr
.stop_split(i
, result
=None, limit
=limit_result
)
200 # provide detailed info about the pure intersection time
201 intersect_id
= i
+ '#intersection'
202 cr
.start_split(intersect_id
)
203 # weightedIntersection preserves the values from any mappings
204 # we get, as some indexes don't return simple sets
205 if hasattr(rs
, 'items') or hasattr(r
, 'items'):
206 _
, rs
= weightedIntersection(rs
, r
)
208 rs
= intersection(rs
, r
)
210 cr
.stop_split(intersect_id
)
212 # consider the time it takes to intersect the index result with
213 # the total resultset to be part of the index time
214 cr
.stop_split(i
, result
=r
, limit
=limit_result
)
218 cr
.stop_split(i
, result
=None, limit
=limit_result
)
220 # Try to deduce the sort limit from batching arguments
221 b_start
= int(query
.get('b_start', 0))
222 b_size
= query
.get('b_size', None)
223 if b_size
is not None:
226 if b_size
is not None:
227 limit
= b_start
+ b_size
228 elif limit
and b_size
is None:
232 # None of the indexes found anything to do with the query
233 # We take this to mean that the query was empty (an empty filter)
234 # and so we return everything in the catalog
235 warnings
.warn('Your query %s produced no query restriction. '
236 'Currently the entire catalog content is returned. '
237 'In Zope 2.14 this will result in an empty LazyCat '
238 'to be returned.' % repr(cr
.make_key(query
)),
239 DeprecationWarning, stacklevel
=3)
242 if sort_index
is None:
243 sequence
, slen
= self
._limit
_sequence
(self
.data
.items(), rlen
,
245 result
= LazyMap(self
.instantiate
, sequence
, slen
,
246 actual_result_count
=rlen
)
248 cr
.start_split('sort_on')
249 result
= self
.sortResults(
250 self
.data
, sort_index
, reverse
, limit
, merge
,
251 actual_result_count
=rlen
, b_start
=b_start
,
253 cr
.stop_split('sort_on', None)
255 # We got some results from the indexes.
256 # Sort and convert to sequences.
257 # XXX: The check for 'values' is really stupid since we call
258 # items() and *not* values()
260 if sort_index
is None and hasattr(rs
, 'items'):
261 # having a 'items' means we have a data structure with
262 # scores. Build a new result set, sort it by score, reverse
263 # it, compute the normalized score, and Lazify it.
266 # Don't bother to sort here, return a list of
267 # three tuples to be passed later to mergeResults
268 # note that data_record_normalized_score_ cannot be
269 # calculated and will always be 1 in this case
270 getitem
= self
.__getitem
__
271 result
= [(score
, (1, score
, rid
), getitem
)
272 for rid
, score
in rs
.items()]
274 cr
.start_split('sort_on')
276 rs
= rs
.byValue(0) # sort it by score
277 max = float(rs
[0][0])
279 # Here we define our getter function inline so that
280 # we can conveniently store the max value as a default arg
281 # and make the normalized score computation lazy
282 def getScoredResult(item
, max=max, self
=self
):
284 Returns instances of self._v_brains, or whatever is
285 passed into self.useBrains.
288 r
=self
._v
_result
_class
(self
.data
[key
])\
289 .__of
__(aq_parent(self
))
290 r
.data_record_id_
= key
291 r
.data_record_score_
= score
292 r
.data_record_normalized_score_
= int(100. * score
/ max)
295 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
297 result
= LazyMap(getScoredResult
, sequence
, slen
,
298 actual_result_count
=rlen
)
299 cr
.stop_split('sort_on', None)
301 elif sort_index
is None and not hasattr(rs
, 'values'):
303 if hasattr(rs
, 'keys'):
305 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
307 result
= LazyMap(self
.__getitem
__, sequence
, slen
,
308 actual_result_count
=rlen
)
310 # sort. If there are scores, then this block is not
311 # reached, therefore 'sort-on' does not happen in the
312 # context of a text index query. This should probably
313 # sort by relevance first, then the 'sort-on' attribute.
314 cr
.start_split('sort_on')
315 result
= self
.sortResults(rs
, sort_index
, reverse
, limit
,
316 merge
, actual_result_count
=rlen
, b_start
=b_start
,
318 cr
.stop_split('sort_on', None)