summaryrefslogtreecommitdiff
path: root/vendor/plugins/thinking-sphinx/lib/thinking_sphinx/attribute.rb
blob: 1d45b2e90e5f1eaf2df051232c0bbbf6292b1218 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
module ThinkingSphinx
  # Attributes - eternally useful when it comes to filtering, sorting or
  # grouping. This class isn't really useful to you unless you're hacking
  # around with the internals of Thinking Sphinx - but hey, don't let that
  # stop you.
  #
  # One key thing to remember - if you're using the attribute manually to
  # generate SQL statements, you'll need to set the base model, and all the
  # associations. Which can get messy. Use Index.link!, it really helps.
  # 
  class Attribute
    attr_accessor :alias, :columns, :associations, :model, :faceted, :source
    
    # To create a new attribute, you'll need to pass in either a single Column
    # or an array of them, and some (optional) options.
    #
    # Valid options are:
    # - :as     => :alias_name
    # - :type   => :attribute_type
    # - :source => :field, :query, :ranged_query
    #
    # Alias is only required in three circumstances: when there's
    # another attribute or field with the same name, when the column name is
    # 'id', or when there's more than one column.
    # 
    # Type is not required, unless you want to force a column to be a certain
    # type (but keep in mind the value will not be CASTed in the SQL
    # statements). The only time you really need to use this is when the type
    # can't be figured out by the column - ie: when not actually using a
    # database column as your source.
    # 
    # Source is only used for multi-value attributes (MVA). By default this will
    # use a left-join and a group_concat to obtain the values. For better performance
    # during indexing it can be beneficial to let Sphinx use a separate query to retrieve
    # all document,value-pairs.
    # Either :query or :ranged_query will enable this feature, where :ranged_query will cause
    # the query to be executed incremental.
    #
    # Example usage:
    #
    #   Attribute.new(
    #     Column.new(:created_at)
    #   )
    #
    #   Attribute.new(
    #     Column.new(:posts, :id),
    #     :as => :post_ids
    #   )
    #
    #   Attribute.new(
    #     Column.new(:posts, :id),
    #     :as => :post_ids,
    #     :source => :ranged_query
    #   )
    #
    #   Attribute.new(
    #     [Column.new(:pages, :id), Column.new(:articles, :id)],
    #     :as => :content_ids
    #   )
    #
    #   Attribute.new(
    #     Column.new("NOW()"),
    #     :as   => :indexed_at,
    #     :type => :datetime
    #   )
    #
    # If you're creating attributes for latitude and longitude, don't forget
    # that Sphinx expects these values to be in radians.
    #  
    def initialize(columns, options = {})
      @columns      = Array(columns)
      @associations = {}
      
      raise "Cannot define a field with no columns. Maybe you are trying to index a field with a reserved name (id, name). You can fix this error by using a symbol rather than a bare name (:id instead of id)." if @columns.empty? || @columns.any? { |column| !column.respond_to?(:__stack) }
      
      @alias    = options[:as]
      @type     = options[:type]
      @faceted  = options[:facet]
      @source   = options[:source]
      @crc      = options[:crc]
      
      @type   ||= :multi    unless @source.nil?
      @type     = :integer  if @type == :string && @crc
    end
    
    # Get the part of the SELECT clause related to this attribute. Don't forget
    # to set your model and associations first though.
    #
    # This will concatenate strings and arrays of integers, and convert
    # datetimes to timestamps, as needed.
    # 
    def to_select_sql
      return nil unless include_as_association?
      
      clause = @columns.collect { |column|
        column_with_prefix(column)
      }.join(', ')
      
      separator = all_ints? ? ',' : ' '
      
      clause = adapter.concatenate(clause, separator)       if concat_ws?
      clause = adapter.group_concatenate(clause, separator) if is_many?
      clause = adapter.cast_to_datetime(clause)             if type == :datetime
      clause = adapter.convert_nulls(clause)                if type == :string
      clause = adapter.crc(clause)                          if @crc
      
      "#{clause} AS #{quote_column(unique_name)}"
    end
    
    # Get the part of the GROUP BY clause related to this attribute - if one is
    # needed. If not, all you'll get back is nil. The latter will happen if
    # there isn't actually a real column to get data from, or if there's
    # multiple data values (read: a has_many or has_and_belongs_to_many
    # association).
    # 
    def to_group_sql
      case
      when is_many?, is_string?, ThinkingSphinx.use_group_by_shortcut?
        nil
      else
        @columns.collect { |column|
          column_with_prefix(column)
        }
      end
    end
    
    def type_to_config
      {
        :multi    => :sql_attr_multi,
        :datetime => :sql_attr_timestamp,
        :string   => :sql_attr_str2ordinal,
        :float    => :sql_attr_float,
        :boolean  => :sql_attr_bool,
        :integer  => :sql_attr_uint
      }[type]
    end
    
    def include_as_association?
      ! (type == :multi && (source == :query || source == :ranged_query))
    end
    
    # Returns the configuration value that should be used for
    # the attribute.
    # Special case is the multi-valued attribute that needs some
    # extra configuration. 
    # 
    def config_value(offset = nil)
      if type == :multi
        multi_config = include_as_association? ? "field" :
          source_value(offset).gsub(/\n\s*/, " ")
        "uint #{unique_name} from #{multi_config}"
      else
        unique_name
      end
    end
    
    # Returns the unique name of the attribute - which is either the alias of
    # the attribute, or the name of the only column - if there is only one. If
    # there isn't, there should be an alias. Else things probably won't work.
    # Consider yourself warned.
    # 
    def unique_name
      if @columns.length == 1
        @alias || @columns.first.__name
      else
        @alias
      end
    end
    
    # Returns the type of the column. If that's not already set, it returns
    # :multi if there's the possibility of more than one value, :string if
    # there's more than one association, otherwise it figures out what the
    # actual column's datatype is and returns that.
    # 
    def type
      @type ||= begin
        base_type = case
        when is_many?, is_many_ints?
          :multi
        when @associations.values.flatten.length > 1
          :string
        else
          translated_type_from_database
        end
        
        if base_type == :string && @crc
          :integer
        else
          @crc = false
          base_type
        end
      end
    end
    
    def to_facet
      return nil unless @faceted
      
      ThinkingSphinx::Facet.new(self)
    end
    
    private
    
    def source_value(offset)
      if is_string?
        "#{source.to_s.dasherize}; #{columns.first.__name}"
      elsif source == :ranged_query
        "ranged-query; #{query offset} #{query_clause}; #{range_query}"
      else
        "query; #{query offset}"
      end
    end
    
    def query(offset)
      assoc = association_for_mva
      raise "Could not determine SQL for MVA" if assoc.nil?
      
      <<-SQL
SELECT #{foreign_key_for_mva assoc}
  #{ThinkingSphinx.unique_id_expression(offset)} AS #{quote_column('id')},
  #{primary_key_for_mva(assoc)} AS #{quote_column(unique_name)}
FROM #{quote_table_name assoc.table}
      SQL
    end
    
    def query_clause
      foreign_key = foreign_key_for_mva association_for_mva
      "WHERE #{foreign_key} >= $start AND #{foreign_key} <= $end"
    end
    
    def range_query
      assoc       = association_for_mva
      foreign_key = foreign_key_for_mva assoc
      "SELECT MIN(#{foreign_key}), MAX(#{foreign_key}) FROM #{quote_table_name assoc.table}"
    end
    
    def primary_key_for_mva(assoc)
      quote_with_table(
        assoc.table, assoc.primary_key_from_reflection || columns.first.__name
      )
    end
    
    def foreign_key_for_mva(assoc)
      quote_with_table assoc.table, assoc.reflection.primary_key_name
    end
    
    def association_for_mva
      @association_for_mva ||= associations[columns.first].detect { |assoc|
        assoc.has_column?(columns.first.__name)
      }
    end
    
    def adapter
      @adapter ||= @model.sphinx_database_adapter
    end
    
    def quote_with_table(table, column)
      "#{quote_table_name(table)}.#{quote_column(column)}"
    end
    
    def quote_column(column)
      @model.connection.quote_column_name(column)
    end
    
    def quote_table_name(table_name)
      @model.connection.quote_table_name(table_name)
    end
    
    # Indication of whether the columns should be concatenated with a space
    # between each value. True if there's either multiple sources or multiple
    # associations.
    # 
    def concat_ws?
      multiple_associations? || @columns.length > 1
    end
        
    # Checks whether any column requires multiple associations (which only
    # happens for polymorphic situations).
    # 
    def multiple_associations?
      associations.any? { |col,assocs| assocs.length > 1 }
    end
    
    # Builds a column reference tied to the appropriate associations. This
    # dives into the associations hash and their corresponding joins to
    # figure out how to correctly reference a column in SQL.
    # 
    def column_with_prefix(column)
      if column.is_string?
        column.__name
      elsif associations[column].empty?
        "#{@model.quoted_table_name}.#{quote_column(column.__name)}"
      else
        associations[column].collect { |assoc|
          assoc.has_column?(column.__name) ?
          "#{quote_table_name(assoc.join.aliased_table_name)}" + 
          ".#{quote_column(column.__name)}" :
          nil
        }.compact.join(', ')
      end
    end
    
    # Could there be more than one value related to the parent record? If so,
    # then this will return true. If not, false. It's that simple.
    # 
    def is_many?
      associations.values.flatten.any? { |assoc| assoc.is_many? }
    end
    
    def is_many_ints?
      concat_ws? && all_ints?
    end
    
    # Returns true if any of the columns are string values, instead of database
    # column references.
    def is_string?
      columns.all? { |col| col.is_string? }
    end
    
    def all_ints?
      @columns.all? { |col|
        klasses = @associations[col].empty? ? [@model] :
          @associations[col].collect { |assoc| assoc.reflection.klass }
        klasses.all? { |klass|
          column = klass.columns.detect { |column| column.name == col.__name.to_s }
          !column.nil? && column.type == :integer
        }
      }
    end
    
    def type_from_database
      klass = @associations.values.flatten.first ? 
        @associations.values.flatten.first.reflection.klass : @model
      
      klass.columns.detect { |col|
        @columns.collect { |c| c.__name.to_s }.include? col.name
      }.type
    end
    
    def translated_type_from_database
      case type_from_db = type_from_database
      when :datetime, :string, :float, :boolean, :integer
        type_from_db
      when :decimal
        :float
      when :timestamp, :date
        :datetime
      else
        raise <<-MESSAGE

Cannot automatically map column type #{type_from_db} to an equivalent Sphinx
type (integer, float, boolean, datetime, string as ordinal). You could try to
explicitly convert the column's value in your define_index block:
  has "CAST(column AS INT)", :type => :integer, :as => :column
        MESSAGE
      end
    end
  end
end