進階查詢

本頁的進階查詢適用於 Google Analytics 的 BigQuery 事件匯出資料。如需較簡單的範例,請參閱「基本查詢」頁面。

購買特定產品的顧客同時購買的產品

以下查詢顯示購買特定產品的顧客還購買了哪些其他產品。這個範例並未假設產品是在同一筆訂單中購買。

最佳化範例會使用 BigQuery 指令碼功能定義變數,宣告要篩選的項目。雖然這不會提升效能,但與使用 WITH 子句建立單一值資料表相比,這種定義變數的方法更容易閱讀。簡化查詢會使用 WITH 子句,採用後者方法。

簡化後的查詢會建立「產品 A 買家」的獨立清單,並與該資料執行聯結。最佳化查詢會改用 ARRAY_AGG 函式,建立使用者在所有訂單中購買的項目清單。接著,查詢會使用外部 WHERE 子句,針對所有使用者的購買清單套用 target_item 篩選器,只顯示相關項目。

單省

-- Example: Products purchased by customers who purchased a specific product.
--
-- `Params` is used to hold the value of the selected product and is referenced
-- throughout the query.

WITH
  Params AS (
    -- Replace with selected item_name or item_id.
    SELECT 'Google Navy Speckled Tee' AS selected_product
  ),
  PurchaseEvents AS (
    SELECT
      user_pseudo_id,
      items
    FROM
      -- Replace table name.
      `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
    WHERE
      -- Replace date range.
      _TABLE_SUFFIX BETWEEN '20201101' AND '20210131'
      AND event_name = 'purchase'
  ),
  ProductABuyers AS (
    SELECT DISTINCT
      user_pseudo_id
    FROM
      Params,
      PurchaseEvents,
      UNNEST(items) AS items
    WHERE
      -- item.item_id can be used instead of items.item_name.
      items.item_name = selected_product
  )
SELECT
  items.item_name AS item_name,
  SUM(items.quantity) AS item_quantity
FROM
  Params,
  PurchaseEvents,
  UNNEST(items) AS items
WHERE
  user_pseudo_id IN (SELECT user_pseudo_id FROM ProductABuyers)
  -- item.item_id can be used instead of items.item_name
  AND items.item_name != selected_product
GROUP BY 1
ORDER BY item_quantity DESC;

最佳化

-- Optimized Example: Products purchased by customers who purchased a specific product.

-- Replace item name
DECLARE target_item STRING DEFAULT 'Google Navy Speckled Tee';

SELECT
  IL.item_name AS item_name,
  SUM(IL.quantity) AS quantity
FROM
  (
    SELECT
      user_pseudo_id,
      ARRAY_AGG(STRUCT(item_name, quantity)) AS item_list
    FROM
      -- Replace table
      `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`, UNNEST(items)
    WHERE
      -- Replace date range
      _TABLE_SUFFIX BETWEEN '20201201' AND '20201210'
      AND event_name = 'purchase'
    GROUP BY
      1
  ),
  UNNEST(item_list) AS IL
WHERE
  target_item IN (SELECT item_name FROM UNNEST(item_list))
  -- Remove the following line if you want the target_item to appear in the results
  AND target_item != IL.item_name
GROUP BY
  item_name
ORDER BY
  quantity DESC;

每筆交易工作階段的平均支出

下列查詢會計算單次工作階段的平均花費金額,但只會考量使用者完成購物交易的工作階段。這兩項查詢都會使用通用資料表運算式 (CTE),先計算每個不重複購買工作階段的總支出。

1. 每位使用者在購買工作階段的平均花費金額:

這項查詢會顯示每位使用者的平均工作階段支出:

-- Calculates the average session spend per user.
WITH
  session_spend AS (
    SELECT
      user_pseudo_id,
      (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS session_id,
      SUM(
        COALESCE(
          (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'value'),
          (SELECT value.float_value FROM UNNEST(event_params) WHERE key = 'value'),
          (SELECT value.double_value FROM UNNEST(event_params) WHERE key = 'value'),
          0.0)
      ) AS total_session_spend
    FROM
      -- Replace table name.
      `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
    WHERE
      event_name = 'purchase'
      -- Replace date range.
      AND _TABLE_SUFFIX BETWEEN '20201101' AND '20210131'
      AND EXISTS(SELECT 1 FROM UNNEST(event_params) WHERE key = 'ga_session_id' AND value.int_value IS NOT NULL)
    GROUP BY
      user_pseudo_id, session_id
  )
SELECT
  user_pseudo_id,
  COUNT(session_id) AS number_of_purchase_sessions,
  AVG(total_session_spend) AS avg_spend_per_session_by_user
FROM
  session_spend
GROUP BY
  user_pseudo_id
ORDER BY
  avg_spend_per_session_by_user DESC;

2. 所有購買工作階段的平均花費金額:

這項查詢會計算所有使用者每次不重複購物工作階段的總平均支出:

-- Calculates the overall average session spend across all users and sessions.
WITH
  session_spend AS (
    SELECT
      user_pseudo_id,
      (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS session_id,
      SUM(
        COALESCE(
          (SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'value'),
          (SELECT value.float_value FROM UNNEST(event_params) WHERE key = 'value'),
          (SELECT value.double_value FROM UNNEST(event_params) WHERE key = 'value'),
          0.0)
      ) AS total_session_spend
    FROM
      -- Replace table name.
      `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
    WHERE
      event_name = 'purchase'
      -- Replace date range.
      AND _TABLE_SUFFIX BETWEEN '20201101' AND '20210131'
      AND EXISTS(SELECT 1 FROM UNNEST(event_params) WHERE key = 'ga_session_id' AND value.int_value IS NOT NULL)
    GROUP BY
      user_pseudo_id, session_id
  )
SELECT
  COUNT(session_id) AS total_purchase_sessions,
  AVG(total_session_spend) AS overall_avg_spend_per_session
FROM
  session_spend;

使用者的最新工作階段 ID 和工作階段編號

以下查詢會提供過去 4 天內,特定使用者清單的最新 ga_session_id 和 ga_session_number。您可以提供 user_pseudo_id 清單或 user_id 清單。

user_pseudo_id

-- Get the latest ga_session_id and ga_session_number for specific users during last 4 days.

-- Replace timezone. List at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.
DECLARE REPORTING_TIMEZONE STRING DEFAULT 'America/Los_Angeles';

-- Replace list of user_pseudo_id's with ones you want to query.
DECLARE USER_PSEUDO_ID_LIST ARRAY<STRING> DEFAULT
  [
    '1005355938.1632145814', '979622592.1632496588', '1101478530.1632831095'];

CREATE TEMP FUNCTION GetParamValue(params ANY TYPE, target_key STRING)
AS (
  (SELECT `value` FROM UNNEST(params) WHERE key = target_key LIMIT 1)
);

CREATE TEMP FUNCTION GetDateSuffix(date_shift INT64, timezone STRING)
AS (
  (SELECT FORMAT_DATE('%Y%m%d', DATE_ADD(CURRENT_DATE(timezone), INTERVAL date_shift DAY)))
);

SELECT DISTINCT
  user_pseudo_id,
  FIRST_VALUE(GetParamValue(event_params, 'ga_session_id').int_value)
    OVER (UserWindow) AS ga_session_id,
  FIRST_VALUE(GetParamValue(event_params, 'ga_session_number').int_value)
    OVER (UserWindow) AS ga_session_number
FROM
  -- Replace table name.
  `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE
  user_pseudo_id IN UNNEST(USER_PSEUDO_ID_LIST)
  AND RIGHT(_TABLE_SUFFIX, 8)
    BETWEEN GetDateSuffix(-3, REPORTING_TIMEZONE)
    AND GetDateSuffix(0, REPORTING_TIMEZONE)
WINDOW UserWindow AS (PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC);

user_id

-- Get the latest ga_session_id and ga_session_number for specific users during last 4 days.

-- Replace timezone. List at https://en.wikipedia.org/wiki/List_of_tz_database_time_zones.
DECLARE REPORTING_TIMEZONE STRING DEFAULT 'America/Los_Angeles';

-- Replace list of user_id's with ones you want to query.
DECLARE USER_ID_LIST ARRAY<STRING> DEFAULT ['<user_id_1>', '<user_id_2>', '<user_id_n>'];

CREATE TEMP FUNCTION GetParamValue(params ANY TYPE, target_key STRING)
AS (
  (SELECT `value` FROM UNNEST(params) WHERE key = target_key LIMIT 1)
);

CREATE TEMP FUNCTION GetDateSuffix(date_shift INT64, timezone STRING)
AS (
  (SELECT FORMAT_DATE('%Y%m%d', DATE_ADD(CURRENT_DATE(timezone), INTERVAL date_shift DAY)))
);

SELECT DISTINCT
  user_pseudo_id,
  FIRST_VALUE(GetParamValue(event_params, 'ga_session_id').int_value)
    OVER (UserWindow) AS ga_session_id,
  FIRST_VALUE(GetParamValue(event_params, 'ga_session_number').int_value)
    OVER (UserWindow) AS ga_session_number
FROM
  -- Replace table name.
  `bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*`
WHERE
  user_id IN UNNEST(USER_ID_LIST)
  AND RIGHT(_TABLE_SUFFIX, 8)
    BETWEEN GetDateSuffix(-3, REPORTING_TIMEZONE)
    AND GetDateSuffix(0, REPORTING_TIMEZONE)
WINDOW UserWindow AS (PARTITION BY user_pseudo_id ORDER BY event_timestamp DESC);