diff options
| author | hukl <contact@smyck.org> | 2009-04-28 00:15:53 +0200 |
|---|---|---|
| committer | hukl <contact@smyck.org> | 2009-05-01 17:14:02 +0200 |
| commit | 4bd16f053847f2efe347ebda9136ef2233ee0d2c (patch) | |
| tree | f4c11f89455de991c8d87726d5757b752e7129e2 /vendor/plugins/thinking-sphinx/lib/thinking_sphinx/attribute.rb | |
| parent | d3a9b46ba5c863a0ff377dcffae9a494fe476e02 (diff) | |
added thinking_sphinx plugin for fulltext search on nodes and heads
Diffstat (limited to 'vendor/plugins/thinking-sphinx/lib/thinking_sphinx/attribute.rb')
| -rw-r--r-- | vendor/plugins/thinking-sphinx/lib/thinking_sphinx/attribute.rb | 358 |
1 files changed, 358 insertions, 0 deletions
diff --git a/vendor/plugins/thinking-sphinx/lib/thinking_sphinx/attribute.rb b/vendor/plugins/thinking-sphinx/lib/thinking_sphinx/attribute.rb new file mode 100644 index 0000000..1d45b2e --- /dev/null +++ b/vendor/plugins/thinking-sphinx/lib/thinking_sphinx/attribute.rb | |||
| @@ -0,0 +1,358 @@ | |||
| 1 | module ThinkingSphinx | ||
| 2 | # Attributes - eternally useful when it comes to filtering, sorting or | ||
| 3 | # grouping. This class isn't really useful to you unless you're hacking | ||
| 4 | # around with the internals of Thinking Sphinx - but hey, don't let that | ||
| 5 | # stop you. | ||
| 6 | # | ||
| 7 | # One key thing to remember - if you're using the attribute manually to | ||
| 8 | # generate SQL statements, you'll need to set the base model, and all the | ||
| 9 | # associations. Which can get messy. Use Index.link!, it really helps. | ||
| 10 | # | ||
| 11 | class Attribute | ||
| 12 | attr_accessor :alias, :columns, :associations, :model, :faceted, :source | ||
| 13 | |||
| 14 | # To create a new attribute, you'll need to pass in either a single Column | ||
| 15 | # or an array of them, and some (optional) options. | ||
| 16 | # | ||
| 17 | # Valid options are: | ||
| 18 | # - :as => :alias_name | ||
| 19 | # - :type => :attribute_type | ||
| 20 | # - :source => :field, :query, :ranged_query | ||
| 21 | # | ||
| 22 | # Alias is only required in three circumstances: when there's | ||
| 23 | # another attribute or field with the same name, when the column name is | ||
| 24 | # 'id', or when there's more than one column. | ||
| 25 | # | ||
| 26 | # Type is not required, unless you want to force a column to be a certain | ||
| 27 | # type (but keep in mind the value will not be CASTed in the SQL | ||
| 28 | # statements). The only time you really need to use this is when the type | ||
| 29 | # can't be figured out by the column - ie: when not actually using a | ||
| 30 | # database column as your source. | ||
| 31 | # | ||
| 32 | # Source is only used for multi-value attributes (MVA). By default this will | ||
| 33 | # use a left-join and a group_concat to obtain the values. For better performance | ||
| 34 | # during indexing it can be beneficial to let Sphinx use a separate query to retrieve | ||
| 35 | # all document,value-pairs. | ||
| 36 | # Either :query or :ranged_query will enable this feature, where :ranged_query will cause | ||
| 37 | # the query to be executed incremental. | ||
| 38 | # | ||
| 39 | # Example usage: | ||
| 40 | # | ||
| 41 | # Attribute.new( | ||
| 42 | # Column.new(:created_at) | ||
| 43 | # ) | ||
| 44 | # | ||
| 45 | # Attribute.new( | ||
| 46 | # Column.new(:posts, :id), | ||
| 47 | # :as => :post_ids | ||
| 48 | # ) | ||
| 49 | # | ||
| 50 | # Attribute.new( | ||
| 51 | # Column.new(:posts, :id), | ||
| 52 | # :as => :post_ids, | ||
| 53 | # :source => :ranged_query | ||
| 54 | # ) | ||
| 55 | # | ||
| 56 | # Attribute.new( | ||
| 57 | # [Column.new(:pages, :id), Column.new(:articles, :id)], | ||
| 58 | # :as => :content_ids | ||
| 59 | # ) | ||
| 60 | # | ||
| 61 | # Attribute.new( | ||
| 62 | # Column.new("NOW()"), | ||
| 63 | # :as => :indexed_at, | ||
| 64 | # :type => :datetime | ||
| 65 | # ) | ||
| 66 | # | ||
| 67 | # If you're creating attributes for latitude and longitude, don't forget | ||
| 68 | # that Sphinx expects these values to be in radians. | ||
| 69 | # | ||
| 70 | def initialize(columns, options = {}) | ||
| 71 | @columns = Array(columns) | ||
| 72 | @associations = {} | ||
| 73 | |||
| 74 | raise "Cannot define a field with no columns. Maybe you are trying to index a field with a reserved name (id, name). You can fix this error by using a symbol rather than a bare name (:id instead of id)." if @columns.empty? || @columns.any? { |column| !column.respond_to?(:__stack) } | ||
| 75 | |||
| 76 | @alias = options[:as] | ||
| 77 | @type = options[:type] | ||
| 78 | @faceted = options[:facet] | ||
| 79 | @source = options[:source] | ||
| 80 | @crc = options[:crc] | ||
| 81 | |||
| 82 | @type ||= :multi unless @source.nil? | ||
| 83 | @type = :integer if @type == :string && @crc | ||
| 84 | end | ||
| 85 | |||
| 86 | # Get the part of the SELECT clause related to this attribute. Don't forget | ||
| 87 | # to set your model and associations first though. | ||
| 88 | # | ||
| 89 | # This will concatenate strings and arrays of integers, and convert | ||
| 90 | # datetimes to timestamps, as needed. | ||
| 91 | # | ||
| 92 | def to_select_sql | ||
| 93 | return nil unless include_as_association? | ||
| 94 | |||
| 95 | clause = @columns.collect { |column| | ||
| 96 | column_with_prefix(column) | ||
| 97 | }.join(', ') | ||
| 98 | |||
| 99 | separator = all_ints? ? ',' : ' ' | ||
| 100 | |||
| 101 | clause = adapter.concatenate(clause, separator) if concat_ws? | ||
| 102 | clause = adapter.group_concatenate(clause, separator) if is_many? | ||
| 103 | clause = adapter.cast_to_datetime(clause) if type == :datetime | ||
| 104 | clause = adapter.convert_nulls(clause) if type == :string | ||
| 105 | clause = adapter.crc(clause) if @crc | ||
| 106 | |||
| 107 | "#{clause} AS #{quote_column(unique_name)}" | ||
| 108 | end | ||
| 109 | |||
| 110 | # Get the part of the GROUP BY clause related to this attribute - if one is | ||
| 111 | # needed. If not, all you'll get back is nil. The latter will happen if | ||
| 112 | # there isn't actually a real column to get data from, or if there's | ||
| 113 | # multiple data values (read: a has_many or has_and_belongs_to_many | ||
| 114 | # association). | ||
| 115 | # | ||
| 116 | def to_group_sql | ||
| 117 | case | ||
| 118 | when is_many?, is_string?, ThinkingSphinx.use_group_by_shortcut? | ||
| 119 | nil | ||
| 120 | else | ||
| 121 | @columns.collect { |column| | ||
| 122 | column_with_prefix(column) | ||
| 123 | } | ||
| 124 | end | ||
| 125 | end | ||
| 126 | |||
| 127 | def type_to_config | ||
| 128 | { | ||
| 129 | :multi => :sql_attr_multi, | ||
| 130 | :datetime => :sql_attr_timestamp, | ||
| 131 | :string => :sql_attr_str2ordinal, | ||
| 132 | :float => :sql_attr_float, | ||
| 133 | :boolean => :sql_attr_bool, | ||
| 134 | :integer => :sql_attr_uint | ||
| 135 | }[type] | ||
| 136 | end | ||
| 137 | |||
| 138 | def include_as_association? | ||
| 139 | ! (type == :multi && (source == :query || source == :ranged_query)) | ||
| 140 | end | ||
| 141 | |||
| 142 | # Returns the configuration value that should be used for | ||
| 143 | # the attribute. | ||
| 144 | # Special case is the multi-valued attribute that needs some | ||
| 145 | # extra configuration. | ||
| 146 | # | ||
| 147 | def config_value(offset = nil) | ||
| 148 | if type == :multi | ||
| 149 | multi_config = include_as_association? ? "field" : | ||
| 150 | source_value(offset).gsub(/\n\s*/, " ") | ||
| 151 | "uint #{unique_name} from #{multi_config}" | ||
| 152 | else | ||
| 153 | unique_name | ||
| 154 | end | ||
| 155 | end | ||
| 156 | |||
| 157 | # Returns the unique name of the attribute - which is either the alias of | ||
| 158 | # the attribute, or the name of the only column - if there is only one. If | ||
| 159 | # there isn't, there should be an alias. Else things probably won't work. | ||
| 160 | # Consider yourself warned. | ||
| 161 | # | ||
| 162 | def unique_name | ||
| 163 | if @columns.length == 1 | ||
| 164 | @alias || @columns.first.__name | ||
| 165 | else | ||
| 166 | @alias | ||
| 167 | end | ||
| 168 | end | ||
| 169 | |||
| 170 | # Returns the type of the column. If that's not already set, it returns | ||
| 171 | # :multi if there's the possibility of more than one value, :string if | ||
| 172 | # there's more than one association, otherwise it figures out what the | ||
| 173 | # actual column's datatype is and returns that. | ||
| 174 | # | ||
| 175 | def type | ||
| 176 | @type ||= begin | ||
| 177 | base_type = case | ||
| 178 | when is_many?, is_many_ints? | ||
| 179 | :multi | ||
| 180 | when @associations.values.flatten.length > 1 | ||
| 181 | :string | ||
| 182 | else | ||
| 183 | translated_type_from_database | ||
| 184 | end | ||
| 185 | |||
| 186 | if base_type == :string && @crc | ||
| 187 | :integer | ||
| 188 | else | ||
| 189 | @crc = false | ||
| 190 | base_type | ||
| 191 | end | ||
| 192 | end | ||
| 193 | end | ||
| 194 | |||
| 195 | def to_facet | ||
| 196 | return nil unless @faceted | ||
| 197 | |||
| 198 | ThinkingSphinx::Facet.new(self) | ||
| 199 | end | ||
| 200 | |||
| 201 | private | ||
| 202 | |||
| 203 | def source_value(offset) | ||
| 204 | if is_string? | ||
| 205 | "#{source.to_s.dasherize}; #{columns.first.__name}" | ||
| 206 | elsif source == :ranged_query | ||
| 207 | "ranged-query; #{query offset} #{query_clause}; #{range_query}" | ||
| 208 | else | ||
| 209 | "query; #{query offset}" | ||
| 210 | end | ||
| 211 | end | ||
| 212 | |||
| 213 | def query(offset) | ||
| 214 | assoc = association_for_mva | ||
| 215 | raise "Could not determine SQL for MVA" if assoc.nil? | ||
| 216 | |||
| 217 | <<-SQL | ||
| 218 | SELECT #{foreign_key_for_mva assoc} | ||
| 219 | #{ThinkingSphinx.unique_id_expression(offset)} AS #{quote_column('id')}, | ||
| 220 | #{primary_key_for_mva(assoc)} AS #{quote_column(unique_name)} | ||
| 221 | FROM #{quote_table_name assoc.table} | ||
| 222 | SQL | ||
| 223 | end | ||
| 224 | |||
| 225 | def query_clause | ||
| 226 | foreign_key = foreign_key_for_mva association_for_mva | ||
| 227 | "WHERE #{foreign_key} >= $start AND #{foreign_key} <= $end" | ||
| 228 | end | ||
| 229 | |||
| 230 | def range_query | ||
| 231 | assoc = association_for_mva | ||
| 232 | foreign_key = foreign_key_for_mva assoc | ||
| 233 | "SELECT MIN(#{foreign_key}), MAX(#{foreign_key}) FROM #{quote_table_name assoc.table}" | ||
| 234 | end | ||
| 235 | |||
| 236 | def primary_key_for_mva(assoc) | ||
| 237 | quote_with_table( | ||
| 238 | assoc.table, assoc.primary_key_from_reflection || columns.first.__name | ||
| 239 | ) | ||
| 240 | end | ||
| 241 | |||
| 242 | def foreign_key_for_mva(assoc) | ||
| 243 | quote_with_table assoc.table, assoc.reflection.primary_key_name | ||
| 244 | end | ||
| 245 | |||
| 246 | def association_for_mva | ||
| 247 | @association_for_mva ||= associations[columns.first].detect { |assoc| | ||
| 248 | assoc.has_column?(columns.first.__name) | ||
| 249 | } | ||
| 250 | end | ||
| 251 | |||
| 252 | def adapter | ||
| 253 | @adapter ||= @model.sphinx_database_adapter | ||
| 254 | end | ||
| 255 | |||
| 256 | def quote_with_table(table, column) | ||
| 257 | "#{quote_table_name(table)}.#{quote_column(column)}" | ||
| 258 | end | ||
| 259 | |||
| 260 | def quote_column(column) | ||
| 261 | @model.connection.quote_column_name(column) | ||
| 262 | end | ||
| 263 | |||
| 264 | def quote_table_name(table_name) | ||
| 265 | @model.connection.quote_table_name(table_name) | ||
| 266 | end | ||
| 267 | |||
| 268 | # Indication of whether the columns should be concatenated with a space | ||
| 269 | # between each value. True if there's either multiple sources or multiple | ||
| 270 | # associations. | ||
| 271 | # | ||
| 272 | def concat_ws? | ||
| 273 | multiple_associations? || @columns.length > 1 | ||
| 274 | end | ||
| 275 | |||
| 276 | # Checks whether any column requires multiple associations (which only | ||
| 277 | # happens for polymorphic situations). | ||
| 278 | # | ||
| 279 | def multiple_associations? | ||
| 280 | associations.any? { |col,assocs| assocs.length > 1 } | ||
| 281 | end | ||
| 282 | |||
| 283 | # Builds a column reference tied to the appropriate associations. This | ||
| 284 | # dives into the associations hash and their corresponding joins to | ||
| 285 | # figure out how to correctly reference a column in SQL. | ||
| 286 | # | ||
| 287 | def column_with_prefix(column) | ||
| 288 | if column.is_string? | ||
| 289 | column.__name | ||
| 290 | elsif associations[column].empty? | ||
| 291 | "#{@model.quoted_table_name}.#{quote_column(column.__name)}" | ||
| 292 | else | ||
| 293 | associations[column].collect { |assoc| | ||
| 294 | assoc.has_column?(column.__name) ? | ||
| 295 | "#{quote_table_name(assoc.join.aliased_table_name)}" + | ||
| 296 | ".#{quote_column(column.__name)}" : | ||
| 297 | nil | ||
| 298 | }.compact.join(', ') | ||
| 299 | end | ||
| 300 | end | ||
| 301 | |||
| 302 | # Could there be more than one value related to the parent record? If so, | ||
| 303 | # then this will return true. If not, false. It's that simple. | ||
| 304 | # | ||
| 305 | def is_many? | ||
| 306 | associations.values.flatten.any? { |assoc| assoc.is_many? } | ||
| 307 | end | ||
| 308 | |||
| 309 | def is_many_ints? | ||
| 310 | concat_ws? && all_ints? | ||
| 311 | end | ||
| 312 | |||
| 313 | # Returns true if any of the columns are string values, instead of database | ||
| 314 | # column references. | ||
| 315 | def is_string? | ||
| 316 | columns.all? { |col| col.is_string? } | ||
| 317 | end | ||
| 318 | |||
| 319 | def all_ints? | ||
| 320 | @columns.all? { |col| | ||
| 321 | klasses = @associations[col].empty? ? [@model] : | ||
| 322 | @associations[col].collect { |assoc| assoc.reflection.klass } | ||
| 323 | klasses.all? { |klass| | ||
| 324 | column = klass.columns.detect { |column| column.name == col.__name.to_s } | ||
| 325 | !column.nil? && column.type == :integer | ||
| 326 | } | ||
| 327 | } | ||
| 328 | end | ||
| 329 | |||
| 330 | def type_from_database | ||
| 331 | klass = @associations.values.flatten.first ? | ||
| 332 | @associations.values.flatten.first.reflection.klass : @model | ||
| 333 | |||
| 334 | klass.columns.detect { |col| | ||
| 335 | @columns.collect { |c| c.__name.to_s }.include? col.name | ||
| 336 | }.type | ||
| 337 | end | ||
| 338 | |||
| 339 | def translated_type_from_database | ||
| 340 | case type_from_db = type_from_database | ||
| 341 | when :datetime, :string, :float, :boolean, :integer | ||
| 342 | type_from_db | ||
| 343 | when :decimal | ||
| 344 | :float | ||
| 345 | when :timestamp, :date | ||
| 346 | :datetime | ||
| 347 | else | ||
| 348 | raise <<-MESSAGE | ||
| 349 | |||
| 350 | Cannot automatically map column type #{type_from_db} to an equivalent Sphinx | ||
| 351 | type (integer, float, boolean, datetime, string as ordinal). You could try to | ||
| 352 | explicitly convert the column's value in your define_index block: | ||
| 353 | has "CAST(column AS INT)", :type => :integer, :as => :column | ||
| 354 | MESSAGE | ||
| 355 | end | ||
| 356 | end | ||
| 357 | end | ||
| 358 | end \ No newline at end of file | ||
