...
-- 11-22-23: This query parses out thesis call numbers into LC class, LC Lc class number, internal year, first cutter and second cutter
-- Does not work well with malformed call numbers or very unusual thesis call number schemes
-- 11-30-23: updated query to work with Law theses and Thesis Film
-- 1. Select thesis holdings records where "Thesis" is part of the call number core or call number prefixprefixr
WITH parameters AS
(SELECT
'%Law%%Mann%'::VARCHAR AS library_name_filter
),
recs AS (
SELECT
ll.library_name,
he.permanent_location_name,
he.holdings_hrid,
he.discovery_suppress,
he.call_number_prefix,
he.call_number
FROM folio_reporting.holdings_ext AS he
LEFT JOIN folio_reporting.locations_libraries AS ll
ON he.permanent_location_id = ll.location_id
WHERE (he.call_number ILIKE '%thesis%' OR he.call_number_prefix ILIKE '%thesis%')
AND (he.discovery_suppress = 'False' OR he.discovery_suppress IS NULL)
AND ll.library_name ILIKE (SELECT library_name_filter FROM parameters)
)
-- 2. Apply parsing formulas to call number to get LC class, LC class number, internal year, first cutter and second cutter
SELECT
recs.library_name,
recs.permanent_location_name,
recs.holdings_hrid,
recs.call_number_prefix,
recs.call_number,
-- get LC class
CASE
WHEN recs.call_number ILIKE '%film%' then ' - '
WHEN recs.call_number ILIKE '%thesis%' THEN SUBSTRING (recs.call_number, '\s[A-Za-z]{1,3}') ELSE SUBSTRING (recs.call_number,'[A-Za-z]{1,3}') END AS lc_class,
-- get LC class number
CASE WHEN recs.call_number ILIKE '%film%' THEN NULL
ELSE
(TRIM (trailing '.' FROM
CASE
WHEN SUBSTRING (recs.call_number,1,1) in ('1','2') THEN NULL
WHEN SUBSTRING (recs.call_number,2,1) = ' ' THEN NULL
ELSE SUBSTRING (recs.call_number,'\d{1,4}\.{0,}\d{0,}')
END))
END AS lc_class_number,
-- get internal year
TRIM (CASE WHEN SUBSTRING (recs.call_number,1,1) IN ('1','2')
THEN SUBSTRING (recs.call_number,1,4)
ELSE SUBSTRING (recs.call_number, '\s\d{4}\s{0,1}')
END) AS internal_year,
-- get first cutter
CASE WHEN recs.library_name NOT LIKE 'Law%' THEN SUBSTRING (recs.call_number, '\d\s{0,1}([A-Z]\d{1,})')
ELSE SUBSTRING (recs.call_number, '(\.[A-Z]\d{1,}[a-z]{0,1})') END AS first_cutter,
-- get second cutter
CASE
WHEN SUBSTRING (recs.call_number, '\.\s{0,1}[A-Z]\d{1,}') IS NULL
OR
TRIM (SUBSTRING (recs.call_number, '\.\s{0,1}[A-Z]\d{1,}')) = TRIM (SUBSTRING (recs.call_number, '\s{1}\.{0,1}[A-Z]{1}\d{1,}[a-z]{0,1}'))
THEN NULL
ELSE SUBSTRING (recs.call_number, '\s{1}[A-Z]{1}\d{1,}x{0,}a{0,}b{0,}') END AS second_cutter
FROM recs
ORDER BY
permanent_location_name,
-- then order by LC class
lc_class,
-- then order by lc class number (using the entire CASE WHEN statement above)
CASE
WHEN SUBSTRING (recs.call_number,1,1) in ('1','2') THEN NULL
WHEN SUBSTRING (recs.call_number,2,1) = ' ' THEN NULL
ELSE SUBSTRING (recs.call_number,'\d{1,4}\.{0,}\d{0,}')
END::NUMERIC,
-- then order by internal year (using the entire CASE WHEN statement)
TRIM (CASE WHEN substring (recs.call_number,1,1) IN ('1','2')
THEN substring (recs.call_number,1,4)
ELSE SUBSTRING (recs.call_number, '\s\d{4}\s{0,1}')
END)::INTEGER,
-- then order by first cutter, then second cutter
first_cutter,
second_cutter,
recs.call_number
;