1 # -*- coding: utf-8 -*-
2 from App
.class_init
import InitializeClass
3 from AccessControl
import ClassSecurityInfo
4 from Products
.CMFCore
.interfaces
import IIndexableObject
5 from Products
.CMFCore
.CatalogTool
import CatalogTool
as BaseCatalogTool
6 from Products
.CMFCore
.CatalogTool
import IndexableObjectWrapper
7 from Products
.PageTemplates
.PageTemplateFile
import PageTemplateFile
8 from Products
.CMFCore
.permissions
import ModifyPortalContent
9 from zope
.component
import queryMultiAdapter
10 from Products
.ZCatalog
.Catalog
import Catalog
14 class SolrTransactionHook
:
15 ''' commit solr couplé sur le commit de la ZODB '''
16 def __init__(self
, connection
) :
17 self
.connection
= connection
19 def __call__(self
, status
) :
21 self
.connection
.commit()
22 self
.connection
.close()
24 self
.connection
.close()
26 class CatalogTool(BaseCatalogTool
) :
27 meta_type
= 'Legivoc Catalog'
28 security
= ClassSecurityInfo()
29 manage_options
= (BaseCatalogTool
.manage_options
[:5] +
30 ({'label' : 'Solr', 'action' : 'manage_solr'},) +
31 BaseCatalogTool
.manage_options
[5:])
32 manage_solr
= PageTemplateFile('www/manage_solr', globals())
35 def __init__(self
, idxs
=[]) :
36 super(CatalogTool
, self
).__init
__()
37 self
._catalog
= DelegatedCatalog()
38 self
.solr_url
= 'http://localhost:8983/solr'
39 self
.delegatedIndexes
= ('Title', 'Description', 'SearchableText')
41 security
.declarePrivate('solrAdd')
42 def solrAdd(self
, object, idxs
=[], uid
=None) :
43 if IIndexableObject
.providedBy(object):
46 w
= queryMultiAdapter( (object, self
), IIndexableObject
)
49 w
= IndexableObjectWrapper(object, self
)
51 uid
= uid
if uid
else self
.__url
(object)
52 idxs
= idxs
if idxs
!=[] else self
.delegatedIndexes
55 attr
= getattr(w
, name
, '')
56 data
[name
] = attr() if callable(attr
) else attr
57 c
= SolrConnection(self
.solr_url
)
59 txn
= transaction
.get()
60 txn
.addAfterCommitHook(SolrTransactionHook(c
))
63 # PortalCatalog api overloads
64 security
.declareProtected(ModifyPortalContent
, 'indexObject')
65 def indexObject(self
, object) :
66 """ Add to catalog and send to Solr """
67 super(CatalogTool
, self
).indexObject(object)
70 security
.declarePrivate('reindexObject')
71 def reindexObject(self
, object, idxs
=[], update_metadata
=1, uid
=None):
72 super(CatalogTool
, self
).reindexObject(object,
74 update_metadata
=update_metadata
,
77 # Filter out invalid indexes.
78 valid_indexes
= self
._catalog
.indexes
.keys()
79 idxs
= [i
for i
in idxs
if i
in valid_indexes
and i
in self
.delegatedIndexes
]
81 idxs
= self
.delegatedIndexes
84 self
.solrAdd(object, idxs
=idxs
, uid
=uid
)
86 security
.declarePrivate('unindexObject')
87 def unindexObject(self
, object):
88 """Remove from catalog.
90 super(CatalogTool
, self
).unindexObject(object)
91 c
= SolrConnection(self
.solr_url
)
92 url
= self
.__url
(object)
94 txn
= transaction
.get()
95 txn
.addAfterCommitHook(SolrTransactionHook(c
))
97 InitializeClass(CatalogTool
)
100 class DelegatedCatalog(Catalog
) :
101 '''C'est ici qu'on délègue effectivement à Solr '''
103 def search(self
, query
, sort_index
=None, reverse
=0, limit
=None, merge
=1):
104 """Iterate through the indexes, applying the query to each one. If
105 merge is true then return a lazy result set (sorted if appropriate)
106 otherwise return the raw (possibly scored) results for later merging.
107 Limit is used in conjuntion with sorting or scored results to inform
108 the catalog how many results you are really interested in. The catalog
109 can then use optimizations to save time and memory. The number of
110 results is not guaranteed to fall within the limit however, you should
111 still slice or batch the results as usual."""
113 rs
= None # resultset
115 # Indexes fulfill a fairly large contract here. We hand each
116 # index the query mapping we are given (which may be composed
117 # of some combination of web request, kw mappings or plain old dicts)
118 # and the index decides what to do with it. If the index finds work
119 # for itself in the query, it returns the results and a tuple of
120 # the attributes that were used. If the index finds nothing for it
121 # to do then it returns None.
123 # Canonicalize the request into a sensible query before passing it on
124 query
= self
.make_query(query
)
126 cr
= self
.getCatalogPlan(query
)
131 plan
= self
._sorted
_search
_indexes
(query
)
133 indexes
= self
.indexes
.keys()
136 # We can have bogus keys or the plan can contain index names
137 # that have been removed in the meantime
140 index
= self
.getIndex(i
)
141 _apply_index
= getattr(index
, "_apply_index", None)
142 if _apply_index
is None:
146 limit_result
= ILimitedResultIndex
.providedBy(index
)
148 r
= _apply_index(query
, rs
)
150 r
= _apply_index(query
)
154 # Short circuit if empty result
155 # BBB: We can remove the "r is not None" check in Zope 2.14
156 # once we don't need to support the "return everything" case
158 if r
is not None and not r
:
159 cr
.stop_split(i
, result
=None, limit
=limit_result
)
162 # provide detailed info about the pure intersection time
163 intersect_id
= i
+ '#intersection'
164 cr
.start_split(intersect_id
)
165 # weightedIntersection preserves the values from any mappings
166 # we get, as some indexes don't return simple sets
167 if hasattr(rs
, 'items') or hasattr(r
, 'items'):
168 _
, rs
= weightedIntersection(rs
, r
)
170 rs
= intersection(rs
, r
)
172 cr
.stop_split(intersect_id
)
174 # consider the time it takes to intersect the index result with
175 # the total resultset to be part of the index time
176 cr
.stop_split(i
, result
=r
, limit
=limit_result
)
180 cr
.stop_split(i
, result
=None, limit
=limit_result
)
182 # Try to deduce the sort limit from batching arguments
183 b_start
= int(query
.get('b_start', 0))
184 b_size
= query
.get('b_size', None)
185 if b_size
is not None:
188 if b_size
is not None:
189 limit
= b_start
+ b_size
190 elif limit
and b_size
is None:
194 # None of the indexes found anything to do with the query
195 # We take this to mean that the query was empty (an empty filter)
196 # and so we return everything in the catalog
197 warnings
.warn('Your query %s produced no query restriction. '
198 'Currently the entire catalog content is returned. '
199 'In Zope 2.14 this will result in an empty LazyCat '
200 'to be returned.' % repr(cr
.make_key(query
)),
201 DeprecationWarning, stacklevel
=3)
204 if sort_index
is None:
205 sequence
, slen
= self
._limit
_sequence
(self
.data
.items(), rlen
,
207 result
= LazyMap(self
.instantiate
, sequence
, slen
,
208 actual_result_count
=rlen
)
210 cr
.start_split('sort_on')
211 result
= self
.sortResults(
212 self
.data
, sort_index
, reverse
, limit
, merge
,
213 actual_result_count
=rlen
, b_start
=b_start
,
215 cr
.stop_split('sort_on', None)
217 # We got some results from the indexes.
218 # Sort and convert to sequences.
219 # XXX: The check for 'values' is really stupid since we call
220 # items() and *not* values()
222 if sort_index
is None and hasattr(rs
, 'items'):
223 # having a 'items' means we have a data structure with
224 # scores. Build a new result set, sort it by score, reverse
225 # it, compute the normalized score, and Lazify it.
228 # Don't bother to sort here, return a list of
229 # three tuples to be passed later to mergeResults
230 # note that data_record_normalized_score_ cannot be
231 # calculated and will always be 1 in this case
232 getitem
= self
.__getitem
__
233 result
= [(score
, (1, score
, rid
), getitem
)
234 for rid
, score
in rs
.items()]
236 cr
.start_split('sort_on')
238 rs
= rs
.byValue(0) # sort it by score
239 max = float(rs
[0][0])
241 # Here we define our getter function inline so that
242 # we can conveniently store the max value as a default arg
243 # and make the normalized score computation lazy
244 def getScoredResult(item
, max=max, self
=self
):
246 Returns instances of self._v_brains, or whatever is
247 passed into self.useBrains.
250 r
=self
._v
_result
_class
(self
.data
[key
])\
251 .__of
__(aq_parent(self
))
252 r
.data_record_id_
= key
253 r
.data_record_score_
= score
254 r
.data_record_normalized_score_
= int(100. * score
/ max)
257 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
259 result
= LazyMap(getScoredResult
, sequence
, slen
,
260 actual_result_count
=rlen
)
261 cr
.stop_split('sort_on', None)
263 elif sort_index
is None and not hasattr(rs
, 'values'):
265 if hasattr(rs
, 'keys'):
267 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
269 result
= LazyMap(self
.__getitem
__, sequence
, slen
,
270 actual_result_count
=rlen
)
272 # sort. If there are scores, then this block is not
273 # reached, therefore 'sort-on' does not happen in the
274 # context of a text index query. This should probably
275 # sort by relevance first, then the 'sort-on' attribute.
276 cr
.start_split('sort_on')
277 result
= self
.sortResults(rs
, sort_index
, reverse
, limit
,
278 merge
, actual_result_count
=rlen
, b_start
=b_start
,
280 cr
.stop_split('sort_on', None)