Commit 00cf42bca0d521f4e9c0d2b8ee8f67cfe79ad89b
1 parent
0984bc9a
Exists in
master
and in
1 other branch
Moved performant conversions section [skip ci]
Showing
1 changed file
with
87 additions
and
89 deletions
Show diff stats
README.md
@@ -679,7 +679,93 @@ end | @@ -679,7 +679,93 @@ end | ||
679 | 679 | ||
680 | Reindex and set up a cron job to add new conversions daily. For zero downtime deployment, temporarily set `conversions: false` in your search calls until the data is reindexed. | 680 | Reindex and set up a cron job to add new conversions daily. For zero downtime deployment, temporarily set `conversions: false` in your search calls until the data is reindexed. |
681 | 681 | ||
682 | -For a performant way to reindex conversion data, check out [performant conversions](#performant-conversions). | 682 | +### Performant Conversions |
683 | + | ||
684 | +A performant way to do conversions is to cache them to prevent N+1 queries. For Postgres, create a migration with: | ||
685 | + | ||
686 | +```ruby | ||
687 | +add_column :products, :search_conversions, :jsonb | ||
688 | +``` | ||
689 | + | ||
690 | +For MySQL, use `:json`, and for others, use `:text` with a [JSON serializer](https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html). | ||
691 | + | ||
692 | +Next, update your model. Create a separate method for conversion data so you can use [partial reindexing](#partial-reindexing). | ||
693 | + | ||
694 | +```ruby | ||
695 | +class Product < ApplicationRecord | ||
696 | + searchkick conversions: [:conversions] | ||
697 | + | ||
698 | + def search_data | ||
699 | + { | ||
700 | + name: name, | ||
701 | + category: category | ||
702 | + }.merge(conversions_data) | ||
703 | + end | ||
704 | + | ||
705 | + def conversions_data | ||
706 | + { | ||
707 | + conversions: search_conversions || {} | ||
708 | + } | ||
709 | + end | ||
710 | +end | ||
711 | +``` | ||
712 | + | ||
713 | +Deploy and reindex your data. For zero downtime deployment, temporarily set `conversions: false` in your search calls until the data is reindexed. | ||
714 | + | ||
715 | +```ruby | ||
716 | +Product.reindex | ||
717 | +``` | ||
718 | + | ||
719 | +Then, create a job to update the conversions column and reindex records with new conversions. Here’s one you can use for Searchjoy: | ||
720 | + | ||
721 | +```ruby | ||
722 | +class ReindexConversionsJob < ApplicationJob | ||
723 | + def perform(class_name, since: nil, reindex: true) | ||
724 | + # get records that have a recent conversion | ||
725 | + recently_converted_ids = | ||
726 | + Searchjoy::Conversion.where(convertable_type: class_name).where(created_at: since..) | ||
727 | + .order(:convertable_id).distinct.pluck(:convertable_id) | ||
728 | + | ||
729 | + # split into batches | ||
730 | + recently_converted_ids.in_groups_of(1000, false) do |ids| | ||
731 | + # fetch conversions | ||
732 | + conversions = | ||
733 | + Searchjoy::Conversion.where(convertable_id: ids, convertable_type: class_name) | ||
734 | + .joins(:search).where.not(searchjoy_searches: {user_id: nil}) | ||
735 | + .group(:convertable_id, :query).distinct.count(:user_id) | ||
736 | + | ||
737 | + # group by record | ||
738 | + conversions_by_record = {} | ||
739 | + conversions.each do |(id, query), count| | ||
740 | + (conversions_by_record[id] ||= {})[query] = count | ||
741 | + end | ||
742 | + | ||
743 | + # update conversions column | ||
744 | + model = Searchkick.load_model(class_name) | ||
745 | + model.transaction do | ||
746 | + conversions_by_record.each do |id, conversions| | ||
747 | + model.where(id: id).update_all(search_conversions: conversions) | ||
748 | + end | ||
749 | + end | ||
750 | + | ||
751 | + # reindex conversions data | ||
752 | + model.where(id: ids).reindex(:conversions_data) if reindex | ||
753 | + end | ||
754 | + end | ||
755 | +end | ||
756 | +``` | ||
757 | + | ||
758 | +Run the job: | ||
759 | + | ||
760 | +```ruby | ||
761 | +ReindexConversionsJob.perform_now("Product") | ||
762 | +``` | ||
763 | + | ||
764 | +And set it up to run daily. | ||
765 | + | ||
766 | +```ruby | ||
767 | +ReindexConversionsJob.perform_later("Product", since: 1.day.ago) | ||
768 | +``` | ||
683 | 769 | ||
684 | ## Personalized Results | 770 | ## Personalized Results |
685 | 771 | ||
@@ -1585,94 +1671,6 @@ And use: | @@ -1585,94 +1671,6 @@ And use: | ||
1585 | Product.reindex(:prices_data) | 1671 | Product.reindex(:prices_data) |
1586 | ``` | 1672 | ``` |
1587 | 1673 | ||
1588 | -### Performant Conversions | ||
1589 | - | ||
1590 | -Cache conversions to prevent N+1 queries. For Postgres, create a migration with: | ||
1591 | - | ||
1592 | -```ruby | ||
1593 | -add_column :products, :search_conversions, :jsonb | ||
1594 | -``` | ||
1595 | - | ||
1596 | -For MySQL, use `:json`, and for others, use `:text` with a [JSON serializer](https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html). | ||
1597 | - | ||
1598 | -Next, update your model. Create a separate method for conversion data so you can use [partial reindexing](#partial-reindexing). | ||
1599 | - | ||
1600 | -```ruby | ||
1601 | -class Product < ApplicationRecord | ||
1602 | - searchkick conversions: [:conversions] | ||
1603 | - | ||
1604 | - def search_data | ||
1605 | - { | ||
1606 | - name: name, | ||
1607 | - category: category | ||
1608 | - }.merge(conversions_data) | ||
1609 | - end | ||
1610 | - | ||
1611 | - def conversions_data | ||
1612 | - { | ||
1613 | - conversions: search_conversions || {} | ||
1614 | - } | ||
1615 | - end | ||
1616 | -end | ||
1617 | -``` | ||
1618 | - | ||
1619 | -Deploy and reindex your data. For zero downtime deployment, temporarily set `conversions: false` in your search calls until the data is reindexed. | ||
1620 | - | ||
1621 | -```ruby | ||
1622 | -Product.reindex | ||
1623 | -``` | ||
1624 | - | ||
1625 | -Then, create a job to update the conversions column and reindex records with new conversions. Here’s one you can use for Searchjoy: | ||
1626 | - | ||
1627 | -```ruby | ||
1628 | -class ReindexConversionsJob < ApplicationJob | ||
1629 | - def perform(class_name, since: nil, reindex: true) | ||
1630 | - # get records that have a recent conversion | ||
1631 | - recently_converted_ids = | ||
1632 | - Searchjoy::Conversion.where(convertable_type: class_name).where(created_at: since..) | ||
1633 | - .order(:convertable_id).distinct.pluck(:convertable_id) | ||
1634 | - | ||
1635 | - # split into batches | ||
1636 | - recently_converted_ids.in_groups_of(1000, false) do |ids| | ||
1637 | - # fetch conversions | ||
1638 | - conversions = | ||
1639 | - Searchjoy::Conversion.where(convertable_id: ids, convertable_type: class_name) | ||
1640 | - .joins(:search).where.not(searchjoy_searches: {user_id: nil}) | ||
1641 | - .group(:convertable_id, :query).distinct.count(:user_id) | ||
1642 | - | ||
1643 | - # group by record | ||
1644 | - conversions_by_record = {} | ||
1645 | - conversions.each do |(id, query), count| | ||
1646 | - (conversions_by_record[id] ||= {})[query] = count | ||
1647 | - end | ||
1648 | - | ||
1649 | - # update conversions column | ||
1650 | - model = Searchkick.load_model(class_name) | ||
1651 | - model.transaction do | ||
1652 | - conversions_by_record.each do |id, conversions| | ||
1653 | - model.where(id: id).update_all(search_conversions: conversions) | ||
1654 | - end | ||
1655 | - end | ||
1656 | - | ||
1657 | - # reindex conversions data | ||
1658 | - model.where(id: ids).reindex(:conversions_data) if reindex | ||
1659 | - end | ||
1660 | - end | ||
1661 | -end | ||
1662 | -``` | ||
1663 | - | ||
1664 | -Run the job: | ||
1665 | - | ||
1666 | -```ruby | ||
1667 | -ReindexConversionsJob.perform_now("Product") | ||
1668 | -``` | ||
1669 | - | ||
1670 | -And set it up to run daily. | ||
1671 | - | ||
1672 | -```ruby | ||
1673 | -ReindexConversionsJob.perform_later("Product", since: 1.day.ago) | ||
1674 | -``` | ||
1675 | - | ||
1676 | ## Advanced | 1674 | ## Advanced |
1677 | 1675 | ||
1678 | Searchkick makes it easy to use the Elasticsearch or OpenSearch DSL on its own. | 1676 | Searchkick makes it easy to use the Elasticsearch or OpenSearch DSL on its own. |