From 95f752a271896f0418c85b0090d0ea6df5f9ff73 Mon Sep 17 00:00:00 2001 From: Nick Smith Date: Wed, 20 Jan 2021 22:53:38 -0600 Subject: [PATCH] Nest PF candidates inside jets (only reco so far) --- coffea/nanoevents/methods/nanoaod.py | 12 +++++++++ coffea/nanoevents/schemas/nanoaod.py | 14 +++++++++++ coffea/nanoevents/transforms.py | 37 ++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/coffea/nanoevents/methods/nanoaod.py b/coffea/nanoevents/methods/nanoaod.py index 8a4ab49e1..21e6c3c97 100644 --- a/coffea/nanoevents/methods/nanoaod.py +++ b/coffea/nanoevents/methods/nanoaod.py @@ -298,6 +298,12 @@ def matched_muons(self): def matched_gen(self): return self._events().GenJet._apply_global_index(self.genJetIdxG) + @property + def constituents(self): + if "pFCandsIdxG" not in self.fields: + raise RuntimeError("PF candidates are only available for PFNano") + return self._events().JetPFCands._apply_global_index(self.pFCandsIdxG) + _set_repr_name("Jet") @@ -336,6 +342,12 @@ def subjets(self): def matched_gen(self): return self._events().GenJetAK8._apply_global_index(self.genJetAK8IdxG) + @property + def constituents(self): + if "pFCandsIdxG" not in self.fields: + raise RuntimeError("PF candidates are only available for PFNano") + return self._events().FatJetPFCands._apply_global_index(self.pFCandsIdxG) + _set_repr_name("FatJet") diff --git a/coffea/nanoevents/schemas/nanoaod.py b/coffea/nanoevents/schemas/nanoaod.py index 2bd93f97e..85f99a111 100644 --- a/coffea/nanoevents/schemas/nanoaod.py +++ b/coffea/nanoevents/schemas/nanoaod.py @@ -138,6 +138,13 @@ class NanoAODSchema(BaseSchema): "Jet_electronIdxG": ["Jet_electronIdx1G", "Jet_electronIdx2G"], } """Nested collections, where nesting is accomplished by a fixed-length set of indexers""" + nested_index_items = { + "Jet_pFCandsIdxG": ("Jet_nConstituents", "JetPFCands"), + "FatJet_pFCandsIdxG": ("FatJet_nConstituents", "FatJetPFCands"), + "GenJet_pFCandsIdxG": ("GenJet_nConstituents", "GenJetCands"), + "GenFatJet_pFCandsIdxG": ("GenJetAK8_nConstituents", "GenFatJetCands"), + } + """Nested collections, where nesting is accomplished by assuming the target can be unflattened according to a source counts""" special_items = { "GenPart_distinctParentIdxG": ( transforms.distinctParent_form, @@ -207,6 +214,13 @@ def _build_collections(self, branch_forms): [branch_forms[idx] for idx in indexers] ) + # Create nested indexer from n* counts arrays + for name, (local_counts, target) in self.nested_index_items.items(): + if local_counts in branch_forms and "o" + target in branch_forms: + branch_forms[name] = transforms.counts2nestedindex_form( + branch_forms[local_counts], branch_forms["o" + target] + ) + # Create any special arrays for name, (fcn, args) in self.special_items.items(): if all(k in branch_forms for k in args): diff --git a/coffea/nanoevents/transforms.py b/coffea/nanoevents/transforms.py index d5d81cca8..5721e4111 100644 --- a/coffea/nanoevents/transforms.py +++ b/coffea/nanoevents/transforms.py @@ -125,6 +125,43 @@ def local2global(stack): stack.append(out) +def counts2nestedindex_form(local_counts, target_offsets): + if not local_counts["class"].startswith("ListOffset"): + raise RuntimeError + if not target_offsets["class"] == "NumpyArray": + raise RuntimeError + form = { + "class": "ListOffsetArray64", + "offsets": "i64", + "content": copy.deepcopy(local_counts), + } + form["content"]["content"]["itemsize"] = 8 + form["content"]["content"]["primitive"] = "int64" + form["content"]["content"]["parameters"] = {} + key = concat( + local_counts["form_key"], target_offsets["form_key"], "!counts2nestedindex" + ) + form["form_key"] = local_counts["form_key"] + form["content"]["form_key"] = key + form["content"]["content"]["form_key"] = concat(key, "!content") + return form + + +def counts2nestedindex(stack): + """Turn jagged local counts into doubly-kagged global index into a target + + Signature: local_counts,target_offsets,!counts2nestedindex + Outputs a jagged array with same axis-0 shape as counts axis-1 + """ + target_offsets = stack.pop() + local_counts = stack.pop() + out = awkward.unflatten( + numpy.arange(target_offsets[-1], dtype=numpy.int64), + awkward.flatten(local_counts), + ) + stack.append(out) + + @numba.njit def _distinctParent_kernel(allpart_parent, allpart_pdg): out = numpy.empty(len(allpart_pdg), dtype=numpy.int64)