diff options
| author | hukl <contact@smyck.org> | 2011-02-10 14:19:00 +0100 |
|---|---|---|
| committer | hukl <contact@smyck.org> | 2011-02-10 14:19:00 +0100 |
| commit | 7379daad1c73bd3610ed296436250b417ac3673d (patch) | |
| tree | 04f722efc678de9d3aa5bf8f1c96e3be33b18bc4 /vendor/plugins/thinking-sphinx/lib/thinking_sphinx/search.rb | |
| parent | 91633ac4419d839661e35ae8f2efe5c9089cfb67 (diff) | |
removed thinking_sphinx plugin and replaced it with gem.
also tuned dependencies
Diffstat (limited to 'vendor/plugins/thinking-sphinx/lib/thinking_sphinx/search.rb')
| -rw-r--r-- | vendor/plugins/thinking-sphinx/lib/thinking_sphinx/search.rb | 780 |
1 files changed, 0 insertions, 780 deletions
diff --git a/vendor/plugins/thinking-sphinx/lib/thinking_sphinx/search.rb b/vendor/plugins/thinking-sphinx/lib/thinking_sphinx/search.rb deleted file mode 100644 index d476787..0000000 --- a/vendor/plugins/thinking-sphinx/lib/thinking_sphinx/search.rb +++ /dev/null | |||
| @@ -1,780 +0,0 @@ | |||
| 1 | module ThinkingSphinx | ||
| 2 | # Once you've got those indexes in and built, this is the stuff that | ||
| 3 | # matters - how to search! This class provides a generic search | ||
| 4 | # interface - which you can use to search all your indexed models at once. | ||
| 5 | # Most times, you will just want a specific model's results - to search and | ||
| 6 | # search_for_ids methods will do the job in exactly the same manner when | ||
| 7 | # called from a model. | ||
| 8 | # | ||
| 9 | class Search | ||
| 10 | GlobalFacetOptions = { | ||
| 11 | :all_attributes => false, | ||
| 12 | :class_facet => true | ||
| 13 | } | ||
| 14 | |||
| 15 | class << self | ||
| 16 | # Searches for results that match the parameters provided. Will only | ||
| 17 | # return the ids for the matching objects. See #search for syntax | ||
| 18 | # examples. | ||
| 19 | # | ||
| 20 | # Note that this only searches the Sphinx index, with no ActiveRecord | ||
| 21 | # queries. Thus, if your index is not in sync with the database, this | ||
| 22 | # method may return ids that no longer exist there. | ||
| 23 | # | ||
| 24 | def search_for_ids(*args) | ||
| 25 | results, client = search_results(*args.clone) | ||
| 26 | |||
| 27 | options = args.extract_options! | ||
| 28 | page = options[:page] ? options[:page].to_i : 1 | ||
| 29 | |||
| 30 | ThinkingSphinx::Collection.ids_from_results(results, page, client.limit, options) | ||
| 31 | end | ||
| 32 | |||
| 33 | # Searches through the Sphinx indexes for relevant matches. There's | ||
| 34 | # various ways to search, sort, group and filter - which are covered | ||
| 35 | # below. | ||
| 36 | # | ||
| 37 | # Also, if you have WillPaginate installed, the search method can be used | ||
| 38 | # just like paginate. The same parameters - :page and :per_page - work as | ||
| 39 | # expected, and the returned result set can be used by the will_paginate | ||
| 40 | # helper. | ||
| 41 | # | ||
| 42 | # == Basic Searching | ||
| 43 | # | ||
| 44 | # The simplest way of searching is straight text. | ||
| 45 | # | ||
| 46 | # ThinkingSphinx::Search.search "pat" | ||
| 47 | # ThinkingSphinx::Search.search "google" | ||
| 48 | # User.search "pat", :page => (params[:page] || 1) | ||
| 49 | # Article.search "relevant news issue of the day" | ||
| 50 | # | ||
| 51 | # If you specify :include, like in an #find call, this will be respected | ||
| 52 | # when loading the relevant models from the search results. | ||
| 53 | # | ||
| 54 | # User.search "pat", :include => :posts | ||
| 55 | # | ||
| 56 | # == Match Modes | ||
| 57 | # | ||
| 58 | # Sphinx supports 5 different matching modes. By default Thinking Sphinx | ||
| 59 | # uses :all, which unsurprisingly requires all the supplied search terms | ||
| 60 | # to match a result. | ||
| 61 | # | ||
| 62 | # Alternative modes include: | ||
| 63 | # | ||
| 64 | # User.search "pat allan", :match_mode => :any | ||
| 65 | # User.search "pat allan", :match_mode => :phrase | ||
| 66 | # User.search "pat | allan", :match_mode => :boolean | ||
| 67 | # User.search "@name pat | @username pat", :match_mode => :extended | ||
| 68 | # | ||
| 69 | # Any will find results with any of the search terms. Phrase treats the search | ||
| 70 | # terms a single phrase instead of individual words. Boolean and extended allow | ||
| 71 | # for more complex query syntax, refer to the sphinx documentation for further | ||
| 72 | # details. | ||
| 73 | # | ||
| 74 | # == Weighting | ||
| 75 | # | ||
| 76 | # Sphinx has support for weighting, where matches in one field can be considered | ||
| 77 | # more important than in another. Weights are integers, with 1 as the default. | ||
| 78 | # They can be set per-search like this: | ||
| 79 | # | ||
| 80 | # User.search "pat allan", :field_weights => { :alias => 4, :aka => 2 } | ||
| 81 | # | ||
| 82 | # If you're searching multiple models, you can set per-index weights: | ||
| 83 | # | ||
| 84 | # ThinkingSphinx::Search.search "pat", :index_weights => { User => 10 } | ||
| 85 | # | ||
| 86 | # See http://sphinxsearch.com/doc.html#weighting for further details. | ||
| 87 | # | ||
| 88 | # == Searching by Fields | ||
| 89 | # | ||
| 90 | # If you want to step it up a level, you can limit your search terms to | ||
| 91 | # specific fields: | ||
| 92 | # | ||
| 93 | # User.search :conditions => {:name => "pat"} | ||
| 94 | # | ||
| 95 | # This uses Sphinx's extended match mode, unless you specify a different | ||
| 96 | # match mode explicitly (but then this way of searching won't work). Also | ||
| 97 | # note that you don't need to put in a search string. | ||
| 98 | # | ||
| 99 | # == Searching by Attributes | ||
| 100 | # | ||
| 101 | # Also known as filters, you can limit your searches to documents that | ||
| 102 | # have specific values for their attributes. There are three ways to do | ||
| 103 | # this. The first two techniques work in all scenarios - using the :with | ||
| 104 | # or :with_all options. | ||
| 105 | # | ||
| 106 | # ThinkingSphinx::Search.search :with => {:tag_ids => 10} | ||
| 107 | # ThinkingSphinx::Search.search :with => {:tag_ids => [10,12]} | ||
| 108 | # ThinkingSphinx::Search.search :with_all => {:tag_ids => [10,12]} | ||
| 109 | # | ||
| 110 | # The first :with search will match records with a tag_id attribute of 10. | ||
| 111 | # The second :with will match records with a tag_id attribute of 10 OR 12. | ||
| 112 | # If you need to find records that are tagged with ids 10 AND 12, you | ||
| 113 | # will need to use the :with_all search parameter. This is particuarly | ||
| 114 | # useful in conjunction with Multi Value Attributes (MVAs). | ||
| 115 | # | ||
| 116 | # The third filtering technique is only viable if you're searching with a | ||
| 117 | # specific model (not multi-model searching). With a single model, | ||
| 118 | # Thinking Sphinx can figure out what attributes and fields are available, | ||
| 119 | # so you can put it all in the :conditions hash, and it will sort it out. | ||
| 120 | # | ||
| 121 | # Node.search :conditions => {:parent_id => 10} | ||
| 122 | # | ||
| 123 | # Filters can be single values, arrays of values, or ranges. | ||
| 124 | # | ||
| 125 | # Article.search "East Timor", :conditions => {:rating => 3..5} | ||
| 126 | # | ||
| 127 | # == Excluding by Attributes | ||
| 128 | # | ||
| 129 | # Sphinx also supports negative filtering - where the filters are of | ||
| 130 | # attribute values to exclude. This is done with the :without option: | ||
| 131 | # | ||
| 132 | # User.search :without => {:role_id => 1} | ||
| 133 | # | ||
| 134 | # == Excluding by Primary Key | ||
| 135 | # | ||
| 136 | # There is a shortcut to exclude records by their ActiveRecord primary key: | ||
| 137 | # | ||
| 138 | # User.search :without_ids => 1 | ||
| 139 | # | ||
| 140 | # Pass an array or a single value. | ||
| 141 | # | ||
| 142 | # The primary key must be an integer as a negative filter is used. Note | ||
| 143 | # that for multi-model search, an id may occur in more than one model. | ||
| 144 | # | ||
| 145 | # == Infix (Star) Searching | ||
| 146 | # | ||
| 147 | # By default, Sphinx uses English stemming, e.g. matching "shoes" if you | ||
| 148 | # search for "shoe". It won't find "Melbourne" if you search for | ||
| 149 | # "elbourn", though. | ||
| 150 | # | ||
| 151 | # Enable infix searching by something like this in config/sphinx.yml: | ||
| 152 | # | ||
| 153 | # development: | ||
| 154 | # enable_star: 1 | ||
| 155 | # min_infix_length: 2 | ||
| 156 | # | ||
| 157 | # Note that this will make indexing take longer. | ||
| 158 | # | ||
| 159 | # With those settings (and after reindexing), wildcard asterisks can be used | ||
| 160 | # in queries: | ||
| 161 | # | ||
| 162 | # Location.search "*elbourn*" | ||
| 163 | # | ||
| 164 | # To automatically add asterisks around every token (but not operators), | ||
| 165 | # pass the :star option: | ||
| 166 | # | ||
| 167 | # Location.search "elbourn -ustrali", :star => true, :match_mode => :boolean | ||
| 168 | # | ||
| 169 | # This would become "*elbourn* -*ustrali*". The :star option only adds the | ||
| 170 | # asterisks. You need to make the config/sphinx.yml changes yourself. | ||
| 171 | # | ||
| 172 | # By default, the tokens are assumed to match the regular expression /\w+/u. | ||
| 173 | # If you've modified the charset_table, pass another regular expression, e.g. | ||
| 174 | # | ||
| 175 | # User.search("oo@bar.c", :star => /[\w@.]+/u) | ||
| 176 | # | ||
| 177 | # to search for "*oo@bar.c*" and not "*oo*@*bar*.*c*". | ||
| 178 | # | ||
| 179 | # == Sorting | ||
| 180 | # | ||
| 181 | # Sphinx can only sort by attributes, so generally you will need to avoid | ||
| 182 | # using field names in your :order option. However, if you're searching | ||
| 183 | # on a single model, and have specified some fields as sortable, you can | ||
| 184 | # use those field names and Thinking Sphinx will interpret accordingly. | ||
| 185 | # Remember: this will only happen for single-model searches, and only | ||
| 186 | # through the :order option. | ||
| 187 | # | ||
| 188 | # Location.search "Melbourne", :order => :state | ||
| 189 | # User.search :conditions => {:role_id => 2}, :order => "name ASC" | ||
| 190 | # | ||
| 191 | # Keep in mind that if you use a string, you *must* specify the direction | ||
| 192 | # (ASC or DESC) else Sphinx won't return any results. If you use a symbol | ||
| 193 | # then Thinking Sphinx assumes ASC, but if you wish to state otherwise, | ||
| 194 | # use the :sort_mode option: | ||
| 195 | # | ||
| 196 | # Location.search "Melbourne", :order => :state, :sort_mode => :desc | ||
| 197 | # | ||
| 198 | # Of course, there are other sort modes - check out the Sphinx | ||
| 199 | # documentation[http://sphinxsearch.com/doc.html] for that level of | ||
| 200 | # detail though. | ||
| 201 | # | ||
| 202 | # If desired, you can sort by a column in your model instead of a sphinx | ||
| 203 | # field or attribute. This sort only applies to the current page, so is | ||
| 204 | # most useful when performing a search with a single page of results. | ||
| 205 | # | ||
| 206 | # User.search("pat", :sql_order => "name") | ||
| 207 | # | ||
| 208 | # == Grouping | ||
| 209 | # | ||
| 210 | # For this you can use the group_by, group_clause and group_function | ||
| 211 | # options - which are all directly linked to Sphinx's expectations. No | ||
| 212 | # magic from Thinking Sphinx. It can get a little tricky, so make sure | ||
| 213 | # you read all the relevant | ||
| 214 | # documentation[http://sphinxsearch.com/doc.html#clustering] first. | ||
| 215 | # | ||
| 216 | # Grouping is done via three parameters within the options hash | ||
| 217 | # * <tt>:group_function</tt> determines the way grouping is done | ||
| 218 | # * <tt>:group_by</tt> determines the field which is used for grouping | ||
| 219 | # * <tt>:group_clause</tt> determines the sorting order | ||
| 220 | # | ||
| 221 | # === group_function | ||
| 222 | # | ||
| 223 | # Valid values for :group_function are | ||
| 224 | # * <tt>:day</tt>, <tt>:week</tt>, <tt>:month</tt>, <tt>:year</tt> - Grouping is done by the respective timeframes. | ||
| 225 | # * <tt>:attr</tt>, <tt>:attrpair</tt> - Grouping is done by the specified attributes(s) | ||
| 226 | # | ||
| 227 | # === group_by | ||
| 228 | # | ||
| 229 | # This parameter denotes the field by which grouping is done. Note that the | ||
| 230 | # specified field must be a sphinx attribute or index. | ||
| 231 | # | ||
| 232 | # === group_clause | ||
| 233 | # | ||
| 234 | # This determines the sorting order of the groups. In a grouping search, | ||
| 235 | # the matches within a group will sorted by the <tt>:sort_mode</tt> and <tt>:order</tt> parameters. | ||
| 236 | # The group matches themselves however, will be sorted by <tt>:group_clause</tt>. | ||
| 237 | # | ||
| 238 | # The syntax for this is the same as an order parameter in extended sort mode. | ||
| 239 | # Namely, you can specify an SQL-like sort expression with up to 5 attributes | ||
| 240 | # (including internal attributes), eg: "@relevance DESC, price ASC, @id DESC" | ||
| 241 | # | ||
| 242 | # === Grouping by timestamp | ||
| 243 | # | ||
| 244 | # Timestamp grouping groups off items by the day, week, month or year of the | ||
| 245 | # attribute given. In order to do this you need to define a timestamp attribute, | ||
| 246 | # which pretty much looks like the standard defintion for any attribute. | ||
| 247 | # | ||
| 248 | # define_index do | ||
| 249 | # # | ||
| 250 | # # All your other stuff | ||
| 251 | # # | ||
| 252 | # has :created_at | ||
| 253 | # end | ||
| 254 | # | ||
| 255 | # When you need to fire off your search, it'll go something to the tune of | ||
| 256 | # | ||
| 257 | # Fruit.search "apricot", :group_function => :day, :group_by => 'created_at' | ||
| 258 | # | ||
| 259 | # The <tt>@groupby</tt> special attribute will contain the date for that group. | ||
| 260 | # Depending on the <tt>:group_function</tt> parameter, the date format will be | ||
| 261 | # | ||
| 262 | # * <tt>:day</tt> - YYYYMMDD | ||
| 263 | # * <tt>:week</tt> - YYYYNNN (NNN is the first day of the week in question, | ||
| 264 | # counting from the start of the year ) | ||
| 265 | # * <tt>:month</tt> - YYYYMM | ||
| 266 | # * <tt>:year</tt> - YYYY | ||
| 267 | # | ||
| 268 | # | ||
| 269 | # === Grouping by attribute | ||
| 270 | # | ||
| 271 | # The syntax is the same as grouping by timestamp, except for the fact that the | ||
| 272 | # <tt>:group_function</tt> parameter is changed | ||
| 273 | # | ||
| 274 | # Fruit.search "apricot", :group_function => :attr, :group_by => 'size' | ||
| 275 | # | ||
| 276 | # | ||
| 277 | # == Geo/Location Searching | ||
| 278 | # | ||
| 279 | # Sphinx - and therefore Thinking Sphinx - has the facility to search | ||
| 280 | # around a geographical point, using a given latitude and longitude. To | ||
| 281 | # take advantage of this, you will need to have both of those values in | ||
| 282 | # attributes. To search with that point, you can then use one of the | ||
| 283 | # following syntax examples: | ||
| 284 | # | ||
| 285 | # Address.search "Melbourne", :geo => [1.4, -2.217], :order => "@geodist asc" | ||
| 286 | # Address.search "Australia", :geo => [-0.55, 3.108], :order => "@geodist asc" | ||
| 287 | # :latitude_attr => "latit", :longitude_attr => "longit" | ||
| 288 | # | ||
| 289 | # The first example applies when your latitude and longitude attributes | ||
| 290 | # are named any of lat, latitude, lon, long or longitude. If that's not | ||
| 291 | # the case, you will need to explicitly state them in your search, _or_ | ||
| 292 | # you can do so in your model: | ||
| 293 | # | ||
| 294 | # define_index do | ||
| 295 | # has :latit # Float column, stored in radians | ||
| 296 | # has :longit # Float column, stored in radians | ||
| 297 | # | ||
| 298 | # set_property :latitude_attr => "latit" | ||
| 299 | # set_property :longitude_attr => "longit" | ||
| 300 | # end | ||
| 301 | # | ||
| 302 | # Now, geo-location searching really only has an affect if you have a | ||
| 303 | # filter, sort or grouping clause related to it - otherwise it's just a | ||
| 304 | # normal search, and _will not_ return a distance value otherwise. To | ||
| 305 | # make use of the positioning difference, use the special attribute | ||
| 306 | # "@geodist" in any of your filters or sorting or grouping clauses. | ||
| 307 | # | ||
| 308 | # And don't forget - both the latitude and longitude you use in your | ||
| 309 | # search, and the values in your indexes, need to be stored as a float in radians, | ||
| 310 | # _not_ degrees. Keep in mind that if you do this conversion in SQL | ||
| 311 | # you will need to explicitly declare a column type of :float. | ||
| 312 | # | ||
| 313 | # define_index do | ||
| 314 | # has 'RADIANS(lat)', :as => :lat, :type => :float | ||
| 315 | # # ... | ||
| 316 | # end | ||
| 317 | # | ||
| 318 | # Once you've got your results set, you can access the distances as | ||
| 319 | # follows: | ||
| 320 | # | ||
| 321 | # @results.each_with_geodist do |result, distance| | ||
| 322 | # # ... | ||
| 323 | # end | ||
| 324 | # | ||
| 325 | # The distance value is returned as a float, representing the distance in | ||
| 326 | # metres. | ||
| 327 | # | ||
| 328 | # == Handling a Stale Index | ||
| 329 | # | ||
| 330 | # Especially if you don't use delta indexing, you risk having records in the | ||
| 331 | # Sphinx index that are no longer in the database. By default, those will simply | ||
| 332 | # come back as nils: | ||
| 333 | # | ||
| 334 | # >> pat_user.delete | ||
| 335 | # >> User.search("pat") | ||
| 336 | # Sphinx Result: [1,2] | ||
| 337 | # => [nil, <#User id: 2>] | ||
| 338 | # | ||
| 339 | # (If you search across multiple models, you'll get ActiveRecord::RecordNotFound.) | ||
| 340 | # | ||
| 341 | # You can simply Array#compact these results or handle the nils in some other way, but | ||
| 342 | # Sphinx will still report two results, and the missing records may upset your layout. | ||
| 343 | # | ||
| 344 | # If you pass :retry_stale => true to a single-model search, missing records will | ||
| 345 | # cause Thinking Sphinx to retry the query but excluding those records. Since search | ||
| 346 | # is paginated, the new search could potentially include missing records as well, so by | ||
| 347 | # default Thinking Sphinx will retry three times. Pass :retry_stale => 5 to retry five | ||
| 348 | # times, and so on. If there are still missing ids on the last retry, they are | ||
| 349 | # shown as nils. | ||
| 350 | # | ||
| 351 | def search(*args) | ||
| 352 | query = args.clone # an array | ||
| 353 | options = query.extract_options! | ||
| 354 | |||
| 355 | retry_search_on_stale_index(query, options) do | ||
| 356 | results, client = search_results(*(query + [options])) | ||
| 357 | |||
| 358 | ::ActiveRecord::Base.logger.error( | ||
| 359 | "Sphinx Error: #{results[:error]}" | ||
| 360 | ) if results[:error] | ||
| 361 | |||
| 362 | klass = options[:class] | ||
| 363 | page = options[:page] ? options[:page].to_i : 1 | ||
| 364 | |||
| 365 | ThinkingSphinx::Collection.create_from_results(results, page, client.limit, options) | ||
| 366 | end | ||
| 367 | end | ||
| 368 | |||
| 369 | def retry_search_on_stale_index(query, options, &block) | ||
| 370 | stale_ids = [] | ||
| 371 | stale_retries_left = case options[:retry_stale] | ||
| 372 | when true | ||
| 373 | 3 # default to three retries | ||
| 374 | when nil, false | ||
| 375 | 0 # no retries | ||
| 376 | else options[:retry_stale].to_i | ||
| 377 | end | ||
| 378 | begin | ||
| 379 | # Passing this in an option so Collection.create_from_results can see it. | ||
| 380 | # It should only raise on stale records if there are any retries left. | ||
| 381 | options[:raise_on_stale] = stale_retries_left > 0 | ||
| 382 | block.call | ||
| 383 | # If ThinkingSphinx::Collection.create_from_results found records in Sphinx but not | ||
| 384 | # in the DB and the :raise_on_stale option is set, this exception is raised. We retry | ||
| 385 | # a limited number of times, excluding the stale ids from the search. | ||
| 386 | rescue StaleIdsException => e | ||
| 387 | stale_retries_left -= 1 | ||
| 388 | |||
| 389 | stale_ids |= e.ids # For logging | ||
| 390 | options[:without_ids] = Array(options[:without_ids]) | e.ids # Actual exclusion | ||
| 391 | |||
| 392 | tries = stale_retries_left | ||
| 393 | ::ActiveRecord::Base.logger.debug("Sphinx Stale Ids (%s %s left): %s" % [ | ||
| 394 | tries, (tries==1 ? 'try' : 'tries'), stale_ids.join(', ') | ||
| 395 | ]) | ||
| 396 | |||
| 397 | retry | ||
| 398 | end | ||
| 399 | end | ||
| 400 | |||
| 401 | def count(*args) | ||
| 402 | results, client = search_results(*args.clone) | ||
| 403 | results[:total_found] || 0 | ||
| 404 | end | ||
| 405 | |||
| 406 | # Checks if a document with the given id exists within a specific index. | ||
| 407 | # Expected parameters: | ||
| 408 | # | ||
| 409 | # - ID of the document | ||
| 410 | # - Index to check within | ||
| 411 | # - Options hash (defaults to {}) | ||
| 412 | # | ||
| 413 | # Example: | ||
| 414 | # | ||
| 415 | # ThinkingSphinx::Search.search_for_id(10, "user_core", :class => User) | ||
| 416 | # | ||
| 417 | def search_for_id(*args) | ||
| 418 | options = args.extract_options! | ||
| 419 | client = client_from_options options | ||
| 420 | |||
| 421 | query, filters = search_conditions( | ||
| 422 | options[:class], options[:conditions] || {} | ||
| 423 | ) | ||
| 424 | client.filters += filters | ||
| 425 | client.match_mode = :extended unless query.empty? | ||
| 426 | client.id_range = args.first..args.first | ||
| 427 | |||
| 428 | begin | ||
| 429 | return client.query(query, args[1])[:matches].length > 0 | ||
| 430 | rescue Errno::ECONNREFUSED => err | ||
| 431 | raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed." | ||
| 432 | end | ||
| 433 | end | ||
| 434 | |||
| 435 | # Model.facets *args | ||
| 436 | # ThinkingSphinx::Search.facets *args | ||
| 437 | # ThinkingSphinx::Search.facets *args, :all_attributes => true | ||
| 438 | # ThinkingSphinx::Search.facets *args, :class_facet => false | ||
| 439 | # | ||
| 440 | def facets(*args) | ||
| 441 | options = args.extract_options! | ||
| 442 | |||
| 443 | if options[:class] | ||
| 444 | facets_for_model options[:class], args, options | ||
| 445 | else | ||
| 446 | facets_for_all_models args, options | ||
| 447 | end | ||
| 448 | end | ||
| 449 | |||
| 450 | private | ||
| 451 | |||
| 452 | # This method handles the common search functionality, and returns both | ||
| 453 | # the result hash and the client. Not super elegant, but it'll do for | ||
| 454 | # the moment. | ||
| 455 | # | ||
| 456 | def search_results(*args) | ||
| 457 | options = args.extract_options! | ||
| 458 | query = args.join(' ') | ||
| 459 | client = client_from_options options | ||
| 460 | |||
| 461 | query = star_query(query, options[:star]) if options[:star] | ||
| 462 | |||
| 463 | extra_query, filters = search_conditions( | ||
| 464 | options[:class], options[:conditions] || {} | ||
| 465 | ) | ||
| 466 | client.filters += filters | ||
| 467 | client.match_mode = :extended unless extra_query.empty? | ||
| 468 | query = [query, extra_query].join(' ') | ||
| 469 | query.strip! # Because "" and " " are not equivalent | ||
| 470 | |||
| 471 | set_sort_options! client, options | ||
| 472 | |||
| 473 | client.limit = options[:per_page].to_i if options[:per_page] | ||
| 474 | page = options[:page] ? options[:page].to_i : 1 | ||
| 475 | page = 1 if page <= 0 | ||
| 476 | client.offset = (page - 1) * client.limit | ||
| 477 | |||
| 478 | begin | ||
| 479 | ::ActiveRecord::Base.logger.debug "Sphinx: #{query}" | ||
| 480 | results = client.query query | ||
| 481 | ::ActiveRecord::Base.logger.debug "Sphinx Result: #{results[:matches].collect{|m| m[:attributes]["sphinx_internal_id"]}.inspect}" | ||
| 482 | rescue Errno::ECONNREFUSED => err | ||
| 483 | raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed." | ||
| 484 | end | ||
| 485 | |||
| 486 | return results, client | ||
| 487 | end | ||
| 488 | |||
| 489 | # Set all the appropriate settings for the client, using the provided | ||
| 490 | # options hash. | ||
| 491 | # | ||
| 492 | def client_from_options(options = {}) | ||
| 493 | config = ThinkingSphinx::Configuration.instance | ||
| 494 | client = Riddle::Client.new config.address, config.port | ||
| 495 | klass = options[:class] | ||
| 496 | index_options = klass ? klass.sphinx_index_options : {} | ||
| 497 | |||
| 498 | # The Riddle default is per-query max_matches=1000. If we set the | ||
| 499 | # per-server max to a smaller value in sphinx.yml, we need to override | ||
| 500 | # the Riddle default or else we get search errors like | ||
| 501 | # "per-query max_matches=1000 out of bounds (per-server max_matches=200)" | ||
| 502 | if per_server_max_matches = config.configuration.searchd.max_matches | ||
| 503 | options[:max_matches] ||= per_server_max_matches | ||
| 504 | end | ||
| 505 | |||
| 506 | # Turn :index_weights => { "foo" => 2, User => 1 } | ||
| 507 | # into :index_weights => { "foo" => 2, "user_core" => 1, "user_delta" => 1 } | ||
| 508 | if iw = options[:index_weights] | ||
| 509 | options[:index_weights] = iw.inject({}) do |hash, (index,weight)| | ||
| 510 | if index.is_a?(Class) | ||
| 511 | name = ThinkingSphinx::Index.name(index) | ||
| 512 | hash["#{name}_core"] = weight | ||
| 513 | hash["#{name}_delta"] = weight | ||
| 514 | else | ||
| 515 | hash[index] = weight | ||
| 516 | end | ||
| 517 | hash | ||
| 518 | end | ||
| 519 | end | ||
| 520 | |||
| 521 | [ | ||
| 522 | :max_matches, :match_mode, :sort_mode, :sort_by, :id_range, | ||
| 523 | :group_by, :group_function, :group_clause, :group_distinct, :cut_off, | ||
| 524 | :retry_count, :retry_delay, :index_weights, :rank_mode, | ||
| 525 | :max_query_time, :field_weights, :filters, :anchor, :limit | ||
| 526 | ].each do |key| | ||
| 527 | client.send( | ||
| 528 | key.to_s.concat("=").to_sym, | ||
| 529 | options[key] || index_options[key] || client.send(key) | ||
| 530 | ) | ||
| 531 | end | ||
| 532 | |||
| 533 | options[:classes] = [klass] if klass | ||
| 534 | |||
| 535 | client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty? | ||
| 536 | |||
| 537 | client.filters << Riddle::Client::Filter.new( | ||
| 538 | "sphinx_deleted", [0] | ||
| 539 | ) | ||
| 540 | |||
| 541 | # class filters | ||
| 542 | client.filters << Riddle::Client::Filter.new( | ||
| 543 | "class_crc", options[:classes].collect { |k| k.to_crc32s }.flatten | ||
| 544 | ) if options[:classes] | ||
| 545 | |||
| 546 | # normal attribute filters | ||
| 547 | client.filters += options[:with].collect { |attr,val| | ||
| 548 | Riddle::Client::Filter.new attr.to_s, filter_value(val) | ||
| 549 | } if options[:with] | ||
| 550 | |||
| 551 | # exclusive attribute filters | ||
| 552 | client.filters += options[:without].collect { |attr,val| | ||
| 553 | Riddle::Client::Filter.new attr.to_s, filter_value(val), true | ||
| 554 | } if options[:without] | ||
| 555 | |||
| 556 | # every-match attribute filters | ||
| 557 | client.filters += options[:with_all].collect { |attr,vals| | ||
| 558 | Array(vals).collect { |val| | ||
| 559 | Riddle::Client::Filter.new attr.to_s, filter_value(val) | ||
| 560 | } | ||
| 561 | }.flatten if options[:with_all] | ||
| 562 | |||
| 563 | # exclusive attribute filter on primary key | ||
| 564 | client.filters += Array(options[:without_ids]).collect { |id| | ||
| 565 | Riddle::Client::Filter.new 'sphinx_internal_id', filter_value(id), true | ||
| 566 | } if options[:without_ids] | ||
| 567 | |||
| 568 | client | ||
| 569 | end | ||
| 570 | |||
| 571 | def star_query(query, custom_token = nil) | ||
| 572 | token = custom_token.is_a?(Regexp) ? custom_token : /\w+/u | ||
| 573 | |||
| 574 | query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do | ||
| 575 | pre, proper, post = $`, $&, $' | ||
| 576 | is_operator = pre.match(%r{(\W|^)[@~/]\Z}) # E.g. "@foo", "/2", "~3", but not as part of a token | ||
| 577 | is_quote = proper.starts_with?('"') && proper.ends_with?('"') # E.g. "foo bar", with quotes | ||
| 578 | has_star = pre.ends_with?("*") || post.starts_with?("*") | ||
| 579 | if is_operator || is_quote || has_star | ||
| 580 | proper | ||
| 581 | else | ||
| 582 | "*#{proper}*" | ||
| 583 | end | ||
| 584 | end | ||
| 585 | end | ||
| 586 | |||
| 587 | def filter_value(value) | ||
| 588 | case value | ||
| 589 | when Range | ||
| 590 | value.first.is_a?(Time) ? timestamp(value.first)..timestamp(value.last) : value | ||
| 591 | when Array | ||
| 592 | value.collect { |val| val.is_a?(Time) ? timestamp(val) : val } | ||
| 593 | else | ||
| 594 | Array(value) | ||
| 595 | end | ||
| 596 | end | ||
| 597 | |||
| 598 | # Returns the integer timestamp for a Time object. | ||
| 599 | # | ||
| 600 | # If using Rails 2.1+, need to handle timezones to translate them back to | ||
| 601 | # UTC, as that's what datetimes will be stored as by MySQL. | ||
| 602 | # | ||
| 603 | # in_time_zone is a method that was added for the timezone support in | ||
| 604 | # Rails 2.1, which is why it's used for testing. I'm sure there's better | ||
| 605 | # ways, but this does the job. | ||
| 606 | # | ||
| 607 | def timestamp(value) | ||
| 608 | value.respond_to?(:in_time_zone) ? value.utc.to_i : value.to_i | ||
| 609 | end | ||
| 610 | |||
| 611 | # Translate field and attribute conditions to the relevant search string | ||
| 612 | # and filters. | ||
| 613 | # | ||
| 614 | def search_conditions(klass, conditions={}) | ||
| 615 | attributes = klass ? klass.sphinx_indexes.collect { |index| | ||
| 616 | index.attributes.collect { |attrib| attrib.unique_name } | ||
| 617 | }.flatten : [] | ||
| 618 | |||
| 619 | search_string = [] | ||
| 620 | filters = [] | ||
| 621 | |||
| 622 | conditions.each do |key,val| | ||
| 623 | if attributes.include?(key.to_sym) | ||
| 624 | filters << Riddle::Client::Filter.new( | ||
| 625 | key.to_s, filter_value(val) | ||
| 626 | ) | ||
| 627 | else | ||
| 628 | search_string << "@#{key} #{val}" | ||
| 629 | end | ||
| 630 | end | ||
| 631 | |||
| 632 | return search_string.join(' '), filters | ||
| 633 | end | ||
| 634 | |||
| 635 | # Return the appropriate latitude and longitude values, depending on | ||
| 636 | # whether the relevant attributes have been defined, and also whether | ||
| 637 | # there's actually any values. | ||
| 638 | # | ||
| 639 | def anchor_conditions(klass, options) | ||
| 640 | attributes = klass ? klass.sphinx_indexes.collect { |index| | ||
| 641 | index.attributes.collect { |attrib| attrib.unique_name } | ||
| 642 | }.flatten : [] | ||
| 643 | |||
| 644 | lat_attr = klass ? klass.sphinx_indexes.collect { |index| | ||
| 645 | index.options[:latitude_attr] | ||
| 646 | }.compact.first : nil | ||
| 647 | |||
| 648 | lon_attr = klass ? klass.sphinx_indexes.collect { |index| | ||
| 649 | index.options[:longitude_attr] | ||
| 650 | }.compact.first : nil | ||
| 651 | |||
| 652 | lat_attr = options[:latitude_attr] if options[:latitude_attr] | ||
| 653 | lat_attr ||= :lat if attributes.include?(:lat) | ||
| 654 | lat_attr ||= :latitude if attributes.include?(:latitude) | ||
| 655 | |||
| 656 | lon_attr = options[:longitude_attr] if options[:longitude_attr] | ||
| 657 | lon_attr ||= :lng if attributes.include?(:lng) | ||
| 658 | lon_attr ||= :lon if attributes.include?(:lon) | ||
| 659 | lon_attr ||= :long if attributes.include?(:long) | ||
| 660 | lon_attr ||= :longitude if attributes.include?(:longitude) | ||
| 661 | |||
| 662 | lat = options[:lat] | ||
| 663 | lon = options[:lon] | ||
| 664 | |||
| 665 | if options[:geo] | ||
| 666 | lat = options[:geo].first | ||
| 667 | lon = options[:geo].last | ||
| 668 | end | ||
| 669 | |||
| 670 | lat && lon ? { | ||
| 671 | :latitude_attribute => lat_attr.to_s, | ||
| 672 | :latitude => lat, | ||
| 673 | :longitude_attribute => lon_attr.to_s, | ||
| 674 | :longitude => lon | ||
| 675 | } : nil | ||
| 676 | end | ||
| 677 | |||
| 678 | # Set the sort options using the :order key as well as the appropriate | ||
| 679 | # Riddle settings. | ||
| 680 | # | ||
| 681 | def set_sort_options!(client, options) | ||
| 682 | klass = options[:class] | ||
| 683 | fields = klass ? klass.sphinx_indexes.collect { |index| | ||
| 684 | index.fields.collect { |field| field.unique_name } | ||
| 685 | }.flatten : [] | ||
| 686 | index_options = klass ? klass.sphinx_index_options : {} | ||
| 687 | |||
| 688 | order = options[:order] || index_options[:order] | ||
| 689 | case order | ||
| 690 | when Symbol | ||
| 691 | client.sort_mode = :attr_asc if client.sort_mode == :relevance || client.sort_mode.nil? | ||
| 692 | if fields.include?(order) | ||
| 693 | client.sort_by = order.to_s.concat("_sort") | ||
| 694 | else | ||
| 695 | client.sort_by = order.to_s | ||
| 696 | end | ||
| 697 | when String | ||
| 698 | client.sort_mode = :extended | ||
| 699 | client.sort_by = sorted_fields_to_attributes(order, fields) | ||
| 700 | else | ||
| 701 | # do nothing | ||
| 702 | end | ||
| 703 | |||
| 704 | client.sort_mode = :attr_asc if client.sort_mode == :asc | ||
| 705 | client.sort_mode = :attr_desc if client.sort_mode == :desc | ||
| 706 | end | ||
| 707 | |||
| 708 | # Search through a collection of fields and translate any appearances | ||
| 709 | # of them in a string to their attribute equivalent for sorting. | ||
| 710 | # | ||
| 711 | def sorted_fields_to_attributes(string, fields) | ||
| 712 | fields.each { |field| | ||
| 713 | string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match| | ||
| 714 | match.gsub field.to_s, field.to_s.concat("_sort") | ||
| 715 | } | ||
| 716 | } | ||
| 717 | |||
| 718 | string | ||
| 719 | end | ||
| 720 | |||
| 721 | def facets_for_model(klass, args, options) | ||
| 722 | hash = ThinkingSphinx::FacetCollection.new args + [options] | ||
| 723 | options = options.clone.merge! :group_function => :attr | ||
| 724 | |||
| 725 | klass.sphinx_facets.inject(hash) do |hash, facet| | ||
| 726 | unless facet.name == :class && !options[:class_facet] | ||
| 727 | options[:group_by] = facet.attribute_name | ||
| 728 | hash.add_from_results facet, search(*(args + [options])) | ||
| 729 | end | ||
| 730 | |||
| 731 | hash | ||
| 732 | end | ||
| 733 | end | ||
| 734 | |||
| 735 | def facets_for_all_models(args, options) | ||
| 736 | options = GlobalFacetOptions.merge(options) | ||
| 737 | hash = ThinkingSphinx::FacetCollection.new args + [options] | ||
| 738 | options = options.merge! :group_function => :attr | ||
| 739 | |||
| 740 | facet_names(options).inject(hash) do |hash, name| | ||
| 741 | options[:group_by] = name | ||
| 742 | hash.add_from_results name, search(*(args + [options])) | ||
| 743 | hash | ||
| 744 | end | ||
| 745 | end | ||
| 746 | |||
| 747 | def facet_classes(options) | ||
| 748 | options[:classes] || ThinkingSphinx.indexed_models.collect { |model| | ||
| 749 | model.constantize | ||
| 750 | } | ||
| 751 | end | ||
| 752 | |||
| 753 | def facet_names(options) | ||
| 754 | classes = facet_classes(options) | ||
| 755 | names = options[:all_attributes] ? | ||
| 756 | facet_names_for_all_classes(classes) : | ||
| 757 | facet_names_common_to_all_classes(classes) | ||
| 758 | |||
| 759 | names.delete "class_crc" unless options[:class_facet] | ||
| 760 | names | ||
| 761 | end | ||
| 762 | |||
| 763 | def facet_names_for_all_classes(classes) | ||
| 764 | classes.collect { |klass| | ||
| 765 | klass.sphinx_facets.collect { |facet| facet.attribute_name } | ||
| 766 | }.flatten.uniq | ||
| 767 | end | ||
| 768 | |||
| 769 | def facet_names_common_to_all_classes(classes) | ||
| 770 | facet_names_for_all_classes(classes).select { |name| | ||
| 771 | classes.all? { |klass| | ||
| 772 | klass.sphinx_facets.detect { |facet| | ||
| 773 | facet.attribute_name == name | ||
| 774 | } | ||
| 775 | } | ||
| 776 | } | ||
| 777 | end | ||
| 778 | end | ||
| 779 | end | ||
| 780 | end | ||
