diff --git a/README.md b/README.md index 0a3bad2..e743cf6 100644 --- a/README.md +++ b/README.md @@ -834,7 +834,9 @@ product = Product.first product.similar(fields: ["name"], where: {size: "12 oz"}) ``` -### Geospatial Searches +### Simple Geospatial Indexing: geo_points + +If your data consists of point values, searchkick offers a useful shorthand: ```ruby class City < ActiveRecord::Base @@ -846,7 +848,9 @@ class City < ActiveRecord::Base end ``` -Reindex and search with: +Elasticsearch supports a range of useful search types for geo_point data: + +Within a radius ```ruby City.search "san", where: {location: {near: {lat: 37, lon: -114}, within: "100mi"}} # or 160km @@ -878,6 +882,81 @@ Also supports [additional options](https://www.elastic.co/guide/en/elasticsearch City.search "san", boost_by_distance: {field: :location, origin: {lat: 37, lon: -122}, function: :linear, scale: "30mi", decay: 0.5} ``` + +### Complex Geospatial Indexing: geo_shapes + +You can also pass through complex or varied shapes as GeoJSON objects. + +```ruby +class City < ActiveRecord::Base + searchkick geo_shapes: { + bounds: {tree: "geohash", precision: "1km"} + perimeter: {tree: "quadtree", precision: "10m"} + } + + def search_data + attributes.merge { + bounds: { + type: "envelope", + coordinates: [{lat: 4, lon: 1}, {lat: 2, lon: 3}] + }, + perimeter: { + type: "polygon", + coordinates: [[{lat: 1, lon: 2}, {lat: 3, lon: 4}, {lat: 5, lon: 6}, ...]] + } + } + end +end +``` + +The `geo_shapes` hash is passed through to elasticsearch without modification. Please see the [geo_shape data type documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-shape.html) for options. + +Any geospatial data type can be used in the index or in the search. It is up to you to ensure that it is a valid geoJSON representation. The possible shapes are: + +* **point**: single lat/lon pair +* **multipoint**: array of points +* **linestring**: array of at least two lat/lon pairs +* **multilinestring**: array of lines +* **polygon**: an array of paths, each being an array of at least four lat/lon pairs whose first and last points are the same. Paths after the first represent exclusions. +* **multipolygon**: array of polygons +* **envelope**: a bounding box defined by top left and bottom right points +* **circle**: a bounding circle defined by center point and radius +* **geometrycollection**: an array of separate geoJSON objects possibly of various types + +See the [Elasticsearch documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/geo-shape.html) for details. GeoJSON coordinates are usually given as an array of `[lon, lat]` points but this often causes swapping errors so searchkick can also take objects with `lon` and `lat` keys. + +Elasticsearch is sensitive about geo_shape validity. For example it will throw an exception if a polygon contains two consecutive identical points, or is not properly closed. You probably want to validate the data during indexing. + + +### Geospatial searching + +Once a geo_shape index is established, you can apply a geo_shape filter to any search. This also takes a geoJSON shape, and will return a list of items based on their overlap with that shape. + +Intersecting with the query shape: + +```ruby +City.search "san", where: {bounds: {geo_shape: {type: "polygon", coordinates: [[{lat: 38, lon: -123}, ...]]}}} +``` + +Falling entirely within the query shape: + +```ruby +City.search "san", where: {relation: "within", bounds: {geo_shape: {type: "circle", coordinates: [{lat: 38, lon: -123}], radius: "1km"}}} +``` + +Not touching the query shape: + +```ruby +City.search "san", where: {relation: "disjoint", bounds: {geo_shape: {type: "envelope", coordinates: [{lat: 38, lon: -123}, {lat: 37, lon: -122}]}}} +``` + +Envelope is a special case. For consistency, searchkick also understands coordinates given as top_left and bottom_right: + +```ruby +City.search "san", where: {relation: "within", bounds: {geo_shape: {type: "envelope", top_left: {lat: 38, lon: -123}, bottom_right: {lat: 37, lon: -122}}}} +``` + + ### Routing Searchkick supports [Elasticsearch’s routing feature](https://www.elastic.co/blog/customizing-your-document-routing). diff --git a/lib/searchkick/index_options.rb b/lib/searchkick/index_options.rb index c7d86b4..3542994 100644 --- a/lib/searchkick/index_options.rb +++ b/lib/searchkick/index_options.rb @@ -281,6 +281,11 @@ module Searchkick } end + options[:geo_shapes] = options[:geo_shapes].product([{}]).to_h if options[:geo_shapes].is_a? Array + (options[:geo_shapes] || {}).each do |field, shape_options| + mapping[field] = shape_options.merge(type: "geo_shape") + end + (options[:unsearchable] || []).map(&:to_s).each do |field| mapping[field] = { type: default_type, diff --git a/lib/searchkick/query.rb b/lib/searchkick/query.rb index fb87220..14b9c28 100644 --- a/lib/searchkick/query.rb +++ b/lib/searchkick/query.rb @@ -808,6 +808,25 @@ module Searchkick field => op_value } } + when :geo_shape + if op_value[:type] == "envelope" && op_value[:top_left].present? && op_value[:bottom_right].present? + op_value[:coordinates] = [coordinate_array(op_value[:top_left]), coordinate_array(op_value[:bottom_right])] + op_value.delete(:top_left) + op_value.delete(:bottom_right) + elsif op_value[:type] == "circle" + op_value[:coordinates] = coordinate_array(op_value[:coordinates] || []) + else + op_value[:coordinates] = (op_value[:coordinates] || []).map { |loc| coordinate_array(loc) } + end + relation = op_value.delete(:relation) || 'intersects' + filters << { + geo_shape: { + field => { + relation: relation, + shape: op_value + } + } + } when :top_left filters << { geo_bounding_box: { @@ -923,6 +942,21 @@ module Searchkick end end + # Recursively descend through nesting of arrays until we reach either a lat/lon object or an array of numbers, + # eventually returning the same structure with all values transformed to [lon, lat]. Question: should we reverse + # the array order so that arguments can be given as [lat, lon], as happens elsewhere in searchkick? We are moving + # GeoJSON around so it seems better to stick to that specification, though the lat/lon objects are already a deviation. + # + def coordinate_array(value) + if value.is_a?(Hash) + [value[:lon], value[:lat]] + elsif value.is_a?(Array) and !value[0].is_a?(Numeric) + value.map {|a| coordinate_array(a) } + else + value + end + end + def location_value(value) if value.is_a?(Array) value.map(&:to_f).reverse diff --git a/test/geo_shape_test.rb b/test/geo_shape_test.rb new file mode 100644 index 0000000..11c6f5a --- /dev/null +++ b/test/geo_shape_test.rb @@ -0,0 +1,143 @@ +require "pp" +require_relative "test_helper" + +class GeoShapeTest < Minitest::Test + + def test_geo_shape + regions = [ + {name: "Region A", text: "The witch had a cat", territory: "30,40,35,45,40,40,40,30,30,30,30,40"}, + {name: "Region B", text: "and a very tall hat", territory: "50,60,55,65,60,60,60,50,50,50,50,60"}, + {name: "Region C", text: "and long ginger hair which she wore in a plait.", territory: "10,20,15,25,20,20,20,10,10,10,10,20"}, + ] + store regions, Region + + # circle + assert_search "*", ["Region A"], { + where: { + territory: { + geo_shape: { + type: "circle", + coordinates: {lat: 28.0, lon: 38.0}, + radius: "444000m" + } + } + } + }, Region + + # envelope + assert_search "*", ["Region A"], { + where: { + territory: { + geo_shape: { + type: "envelope", + coordinates: [[28, 42], [32, 38]] + } + } + } + }, Region + + # envelope as corners + assert_search "*", ["Region A"], { + where: { + territory: { + geo_shape: { + type: "envelope", + top_left: {lat: 42.0, lon: 28.0}, + bottom_right: {lat: 38.0, lon: 32.0} + } + } + } + }, Region + + # polygon + assert_search "*", ["Region A"], { + where: { + territory: { + geo_shape: { + type: "polygon", + coordinates: [[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]] + } + } + } + }, Region + + # multipolygon + assert_search "*", ["Region A", "Region B"], { + where: { + territory: { + geo_shape: { + type: "multipolygon", + coordinates: [ + [[[38, 42], [42, 42], [42, 38], [38, 38], [38, 42]]], + [[[58, 62], [62, 62], [62, 58], [58, 58], [58, 62]]] + ] + } + } + } + }, Region + + # disjoint + assert_search "*", ["Region B", "Region C"], { + where: { + territory: { + geo_shape: { + type: "envelope", + relation: "disjoint", + coordinates: [[28, 42], [32, 38]] + } + } + } + }, Region + + # within + assert_search "*", ["Region A"], { + where: { + territory: { + geo_shape: { + type: "envelope", + relation: "within", + coordinates: [[20, 50], [50, 20]] + } + } + } + }, Region + + # contains + assert_search "*", ["Region A"], { + where: { + territory: { + geo_shape: { + type: "envelope", + relation: "contains", + coordinates: [[32, 33], [33, 32]] + } + } + } + }, Region + + # with search + assert_search "witch", ["Region A"], { + where: { + territory: { + geo_shape: { + type: "envelope", + coordinates: [[28, 42], [32, 38]] + } + } + } + }, Region + + assert_search "ginger hair", [], { + where: { + territory: { + geo_shape: { + type: "envelope", + coordinates: [[28, 42], [32, 38]] + } + } + } + }, Region + + end + +end diff --git a/test/test_helper.rb b/test/test_helper.rb index 210c81c..da55df2 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -93,6 +93,14 @@ if defined?(Mongoid) field :name end + class Region + include Mongoid::Document + + field :name + field :text + field :territory + end + class Speaker include Mongoid::Document @@ -143,6 +151,15 @@ elsif defined?(NoBrainer) field :name, type: String end + class Region + include NoBrainer::Document + + field :id, type: Object + field :name, type: String + field :text, type: Text + field :territory, type: Text + end + class Speaker include NoBrainer::Document @@ -234,6 +251,12 @@ else t.string :name end + ActiveRecord::Migration.create_table :regions do |t| + t.string :name + t.text :text + t.text :territory + end + ActiveRecord::Migration.create_table :speakers do |t| t.string :name end @@ -250,6 +273,9 @@ else has_many :products end + class Region < ActiveRecord::Base + end + class Speaker < ActiveRecord::Base end @@ -338,6 +364,33 @@ class Store end end +class Region + searchkick \ + geo_shapes: { + territory: {tree: "quadtree", precision: "10km"} + } + + def search_data + { + name: name, + text: text, + territory: as_geo_json + } + end + + def as_geo_json + { + type: "polygon", + coordinates: [territory_path] # enclosing array because polygon can also have exclusion paths. + } + end + + def territory_path + path = territory.split(',').map(&:to_f).each_slice(2).to_a + path + end +end + class Speaker searchkick \ conversions: ["conversions_a", "conversions_b"] @@ -369,6 +422,9 @@ Store.reindex Animal.reindex Speaker.reindex +Region.searchkick_index.delete if Region.searchkick_index.exists? +Region.reindex + class Minitest::Test def setup Product.destroy_all -- libgit2 0.21.0