The following query can be used to parse out "Thesis" call numbers into the major components (LC Class, LC Class Number, First Cutter, Second Cutter and Year). Thesis call numbers can be very variable, so the query does not work for everything!
Most problems appear to be in getting the second cutter - any suggestions for improvement are gratefully accepted! - JL 11-30-23
-- 11-22-23: This query parses out thesis call numbers into LC class, LC class number, internal year, first cutter and second cutter
-- Does not work well with malformed call numbers or very unusual thesis call number schemes
-- 1. Select thesis holdings records where "Thesis" is part of the call number or call number prefixr
WITH parameters AS
(SELECT
'%Law%'::VARCHAR AS library_name_filter
),
recs AS (
SELECT
ll.library_name,
he.permanent_location_name,
he.holdings_hrid,
he.discovery_suppress,
he.call_number_prefix,
he.call_number
FROM folio_reporting.holdings_ext AS he
LEFT JOIN folio_reporting.locations_libraries AS ll
ON he.permanent_location_id = ll.location_id
WHERE (he.call_number ILIKE '%thesis%' OR he.call_number_prefix ILIKE '%thesis%')
AND (he.discovery_suppress = 'False' OR he.discovery_suppress IS NULL)
AND ll.library_name ILIKE (SELECT library_name_filter FROM parameters)
)
-- 2. Apply parsing formulas to call number to get LC class, LC class number, internal year, first cutter and second cutter
SELECT
recs.library_name,
recs.permanent_location_name,
recs.holdings_hrid,
recs.call_number_prefix,
recs.call_number,
CASE
WHEN recs.call_number ILIKE '%film%' then ' - '
WHEN recs.call_number ILIKE '%thesis%' THEN SUBSTRING (recs.call_number, '\s[A-Za-z]{1,3}') ELSE SUBSTRING (recs.call_number,'[A-Za-z]{1,3}') END AS lc_class,
CASE WHEN recs.call_number ILIKE '%film%' THEN NULL
ELSE
(TRIM (trailing '.' FROM
CASE
WHEN SUBSTRING (recs.call_number,1,1) in ('1','2') THEN NULL
WHEN SUBSTRING (recs.call_number,2,1) = ' ' THEN NULL
ELSE SUBSTRING (recs.call_number,'\d{1,4}\.{0,}\d{0,}')
END))
END AS lc_class_number,
TRIM (CASE WHEN SUBSTRING (recs.call_number,1,1) IN ('1','2')
THEN SUBSTRING (recs.call_number,1,4)
ELSE SUBSTRING (recs.call_number, '\s\d{4}\s{0,1}')
END) AS internal_year,
CASE WHEN recs.library_name NOT LIKE 'Law%' THEN SUBSTRING (recs.call_number, '\d\s{0,1}([A-Z]\d{1,})')
ELSE SUBSTRING (recs.call_number, '(\.[A-Z]\d{1,}[a-z]{0,1})') END AS first_cutter,
CASE
WHEN SUBSTRING (recs.call_number, '\.\s{0,1}[A-Z]\d{1,}') IS NULL
OR
TRIM (SUBSTRING (recs.call_number, '\.\s{0,1}[A-Z]\d{1,}')) = TRIM (SUBSTRING (recs.call_number, '\s{1}\.{0,1}[A-Z]{1}\d{1,}[a-z]{0,1}'))
THEN NULL
ELSE SUBSTRING (recs.call_number, '\s{1}[A-Z]{1}\d{1,}x{0,}a{0,}b{0,}') END AS second_cutter
FROM recs
ORDER BY
permanent_location_name,
lc_class,
CASE
WHEN SUBSTRING (recs.call_number,1,1) in ('1','2') THEN NULL
WHEN SUBSTRING (recs.call_number,2,1) = ' ' THEN NULL
ELSE SUBSTRING (recs.call_number,'\d{1,4}\.{0,}\d{0,}')
END::NUMERIC,
TRIM (CASE WHEN substring (recs.call_number,1,1) IN ('1','2')
THEN substring (recs.call_number,1,4)
ELSE SUBSTRING (recs.call_number, '\s\d{4}\s{0,1}')
END)::INTEGER,
first_cutter,
second_cutter
;