// Copyright (c) 2021 The Vulkano developers // Licensed under the Apache License, Version 2.0 // or the MIT // license , // at your option. All files in the project carrying such // notice may not be copied, modified, or distributed except // according to those terms. //! Parsing and analysis utilities for SPIR-V shader binaries. //! //! This can be used to inspect and validate a SPIR-V module at runtime. The `Spirv` type does some //! validation, but you should not assume that code that is read successfully is valid. //! //! For more information about SPIR-V modules, instructions and types, see the //! [SPIR-V specification](https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html). use crate::Version; use ahash::{HashMap, HashMapExt}; use std::{ error::Error, fmt::{Display, Error as FmtError, Formatter}, ops::Range, string::FromUtf8Error, }; // Generated by build.rs include!(concat!(env!("OUT_DIR"), "/spirv_parse.rs")); /// A parsed and analyzed SPIR-V module. #[derive(Clone, Debug)] pub struct Spirv { version: Version, bound: u32, instructions: Vec, ids: HashMap, // Items described in the spec section "Logical Layout of a Module" range_capability: Range, range_extension: Range, range_ext_inst_import: Range, memory_model: usize, range_entry_point: Range, range_execution_mode: Range, range_name: Range, range_decoration: Range, range_global: Range, } impl Spirv { /// Parses a SPIR-V document from a list of words. pub fn new(words: &[u32]) -> Result { if words.len() < 5 { return Err(SpirvError::InvalidHeader); } if words[0] != 0x07230203 { return Err(SpirvError::InvalidHeader); } let version = Version { major: (words[1] & 0x00ff0000) >> 16, minor: (words[1] & 0x0000ff00) >> 8, patch: words[1] & 0x000000ff, }; let bound = words[3]; let instructions = { let mut ret = Vec::new(); let mut rest = &words[5..]; while !rest.is_empty() { let word_count = (rest[0] >> 16) as usize; assert!(word_count >= 1); if rest.len() < word_count { return Err(ParseError { instruction: ret.len(), word: rest.len(), error: ParseErrors::UnexpectedEOF, words: rest.to_owned(), } .into()); } let mut reader = InstructionReader::new(&rest[0..word_count], ret.len()); let instruction = Instruction::parse(&mut reader)?; if !reader.is_empty() { return Err(reader.map_err(ParseErrors::LeftoverOperands).into()); } ret.push(instruction); rest = &rest[word_count..]; } ret }; // It is impossible for a valid SPIR-V file to contain more Ids than instructions, so put // a sane upper limit on the allocation. This prevents a malicious file from causing huge // memory allocations. let mut ids = HashMap::with_capacity(instructions.len().min(bound as usize)); let mut range_capability: Option> = None; let mut range_extension: Option> = None; let mut range_ext_inst_import: Option> = None; let mut range_memory_model: Option> = None; let mut range_entry_point: Option> = None; let mut range_execution_mode: Option> = None; let mut range_name: Option> = None; let mut range_decoration: Option> = None; let mut range_global: Option> = None; let mut in_function = false; fn set_range(range: &mut Option>, index: usize) -> Result<(), SpirvError> { if let Some(range) = range { if range.end != index { return Err(SpirvError::BadLayout { index }); } range.end = index + 1; } else { *range = Some(Range { start: index, end: index + 1, }); } Ok(()) } for (index, instruction) in instructions.iter().enumerate() { if let Some(id) = instruction.result_id() { if u32::from(id) >= bound { return Err(SpirvError::IdOutOfBounds { id, index, bound }); } let members = if let Instruction::TypeStruct { member_types, .. } = instruction { member_types .iter() .map(|_| StructMemberDataIndices::default()) .collect() } else { Vec::new() }; let data = IdDataIndices { index, names: Vec::new(), decorations: Vec::new(), members, }; if let Some(first) = ids.insert(id, data) { return Err(SpirvError::DuplicateId { id, first_index: first.index, second_index: index, }); } } match instruction { Instruction::Capability { .. } => set_range(&mut range_capability, index)?, Instruction::Extension { .. } => set_range(&mut range_extension, index)?, Instruction::ExtInstImport { .. } => set_range(&mut range_ext_inst_import, index)?, Instruction::MemoryModel { .. } => set_range(&mut range_memory_model, index)?, Instruction::EntryPoint { .. } => set_range(&mut range_entry_point, index)?, Instruction::ExecutionMode { .. } | Instruction::ExecutionModeId { .. } => { set_range(&mut range_execution_mode, index)? } Instruction::Name { .. } | Instruction::MemberName { .. } => { set_range(&mut range_name, index)? } Instruction::Decorate { .. } | Instruction::MemberDecorate { .. } | Instruction::DecorationGroup { .. } | Instruction::GroupDecorate { .. } | Instruction::GroupMemberDecorate { .. } | Instruction::DecorateId { .. } | Instruction::DecorateString { .. } | Instruction::MemberDecorateString { .. } => { set_range(&mut range_decoration, index)? } Instruction::TypeVoid { .. } | Instruction::TypeBool { .. } | Instruction::TypeInt { .. } | Instruction::TypeFloat { .. } | Instruction::TypeVector { .. } | Instruction::TypeMatrix { .. } | Instruction::TypeImage { .. } | Instruction::TypeSampler { .. } | Instruction::TypeSampledImage { .. } | Instruction::TypeArray { .. } | Instruction::TypeRuntimeArray { .. } | Instruction::TypeStruct { .. } | Instruction::TypeOpaque { .. } | Instruction::TypePointer { .. } | Instruction::TypeFunction { .. } | Instruction::TypeEvent { .. } | Instruction::TypeDeviceEvent { .. } | Instruction::TypeReserveId { .. } | Instruction::TypeQueue { .. } | Instruction::TypePipe { .. } | Instruction::TypeForwardPointer { .. } | Instruction::TypePipeStorage { .. } | Instruction::TypeNamedBarrier { .. } | Instruction::TypeRayQueryKHR { .. } | Instruction::TypeAccelerationStructureKHR { .. } | Instruction::TypeCooperativeMatrixNV { .. } | Instruction::TypeVmeImageINTEL { .. } | Instruction::TypeAvcImePayloadINTEL { .. } | Instruction::TypeAvcRefPayloadINTEL { .. } | Instruction::TypeAvcSicPayloadINTEL { .. } | Instruction::TypeAvcMcePayloadINTEL { .. } | Instruction::TypeAvcMceResultINTEL { .. } | Instruction::TypeAvcImeResultINTEL { .. } | Instruction::TypeAvcImeResultSingleReferenceStreamoutINTEL { .. } | Instruction::TypeAvcImeResultDualReferenceStreamoutINTEL { .. } | Instruction::TypeAvcImeSingleReferenceStreaminINTEL { .. } | Instruction::TypeAvcImeDualReferenceStreaminINTEL { .. } | Instruction::TypeAvcRefResultINTEL { .. } | Instruction::TypeAvcSicResultINTEL { .. } | Instruction::ConstantTrue { .. } | Instruction::ConstantFalse { .. } | Instruction::Constant { .. } | Instruction::ConstantComposite { .. } | Instruction::ConstantSampler { .. } | Instruction::ConstantNull { .. } | Instruction::ConstantPipeStorage { .. } | Instruction::SpecConstantTrue { .. } | Instruction::SpecConstantFalse { .. } | Instruction::SpecConstant { .. } | Instruction::SpecConstantComposite { .. } | Instruction::SpecConstantOp { .. } => set_range(&mut range_global, index)?, Instruction::Undef { .. } if !in_function => set_range(&mut range_global, index)?, Instruction::Variable { storage_class, .. } if *storage_class != StorageClass::Function => { set_range(&mut range_global, index)? } Instruction::Function { .. } => { in_function = true; } Instruction::Line { .. } | Instruction::NoLine { .. } => { if !in_function { set_range(&mut range_global, index)? } } _ => (), } } let mut spirv = Spirv { version, bound, instructions, ids, range_capability: range_capability.unwrap_or_default(), range_extension: range_extension.unwrap_or_default(), range_ext_inst_import: range_ext_inst_import.unwrap_or_default(), memory_model: if let Some(range) = range_memory_model { if range.end - range.start != 1 { return Err(SpirvError::MemoryModelInvalid); } range.start } else { return Err(SpirvError::MemoryModelInvalid); }, range_entry_point: range_entry_point.unwrap_or_default(), range_execution_mode: range_execution_mode.unwrap_or_default(), range_name: range_name.unwrap_or_default(), range_decoration: range_decoration.unwrap_or_default(), range_global: range_global.unwrap_or_default(), }; for index in spirv.range_name.clone() { match &spirv.instructions[index] { Instruction::Name { target, .. } => { spirv.ids.get_mut(target).unwrap().names.push(index); } Instruction::MemberName { ty, member, .. } => { spirv.ids.get_mut(ty).unwrap().members[*member as usize] .names .push(index); } _ => unreachable!(), } } // First handle all regular decorations, including those targeting decoration groups. for index in spirv.range_decoration.clone() { match &spirv.instructions[index] { Instruction::Decorate { target, .. } | Instruction::DecorateId { target, .. } | Instruction::DecorateString { target, .. } => { spirv.ids.get_mut(target).unwrap().decorations.push(index); } Instruction::MemberDecorate { structure_type: target, member, .. } | Instruction::MemberDecorateString { struct_type: target, member, .. } => { spirv.ids.get_mut(target).unwrap().members[*member as usize] .decorations .push(index); } _ => (), } } // Then, with decoration groups having their lists complete, handle group decorates. for index in spirv.range_decoration.clone() { match &spirv.instructions[index] { Instruction::GroupDecorate { decoration_group, targets, .. } => { let indices = { let data = &spirv.ids[decoration_group]; if !matches!( spirv.instructions[data.index], Instruction::DecorationGroup { .. } ) { return Err(SpirvError::GroupDecorateNotGroup { index }); }; data.decorations.clone() }; for target in targets { spirv .ids .get_mut(target) .unwrap() .decorations .extend(&indices); } } Instruction::GroupMemberDecorate { decoration_group, targets, .. } => { let indices = { let data = &spirv.ids[decoration_group]; if !matches!( spirv.instructions[data.index], Instruction::DecorationGroup { .. } ) { return Err(SpirvError::GroupDecorateNotGroup { index }); }; data.decorations.clone() }; for (target, member) in targets { spirv.ids.get_mut(target).unwrap().members[*member as usize] .decorations .extend(&indices); } } _ => (), } } Ok(spirv) } /// Returns a reference to the instructions in the module. #[inline] pub fn instructions(&self) -> &[Instruction] { &self.instructions } /// Returns the SPIR-V version that the module is compiled for. #[inline] pub fn version(&self) -> Version { self.version } /// Returns the upper bound of `Id`s. All `Id`s should have a numeric value strictly less than /// this value. #[inline] pub fn bound(&self) -> u32 { self.bound } /// Returns information about an `Id`. /// /// # Panics /// /// - Panics if `id` is not defined in this module. This can in theory only happpen if you are /// mixing `Id`s from different modules. #[inline] pub fn id(&self, id: Id) -> IdInfo<'_> { IdInfo { data_indices: &self.ids[&id], instructions: &self.instructions, } } /// Returns an iterator over all `Capability` instructions. #[inline] pub fn iter_capability(&self) -> impl ExactSizeIterator { self.instructions[self.range_capability.clone()].iter() } /// Returns an iterator over all `Extension` instructions. #[inline] pub fn iter_extension(&self) -> impl ExactSizeIterator { self.instructions[self.range_extension.clone()].iter() } /// Returns an iterator over all `ExtInstImport` instructions. #[inline] pub fn iter_ext_inst_import(&self) -> impl ExactSizeIterator { self.instructions[self.range_ext_inst_import.clone()].iter() } /// Returns the `MemoryModel` instruction. #[inline] pub fn memory_model(&self) -> &Instruction { &self.instructions[self.memory_model] } /// Returns an iterator over all `EntryPoint` instructions. #[inline] pub fn iter_entry_point(&self) -> impl ExactSizeIterator { self.instructions[self.range_entry_point.clone()].iter() } /// Returns an iterator over all execution mode instructions. #[inline] pub fn iter_execution_mode(&self) -> impl ExactSizeIterator { self.instructions[self.range_execution_mode.clone()].iter() } /// Returns an iterator over all name debug instructions. #[inline] pub fn iter_name(&self) -> impl ExactSizeIterator { self.instructions[self.range_name.clone()].iter() } /// Returns an iterator over all decoration instructions. #[inline] pub fn iter_decoration(&self) -> impl ExactSizeIterator { self.instructions[self.range_decoration.clone()].iter() } /// Returns an iterator over all global declaration instructions: types, /// constants and global variables. /// /// Note: This can also include `Line` and `NoLine` instructions. #[inline] pub fn iter_global(&self) -> impl ExactSizeIterator { self.instructions[self.range_global.clone()].iter() } } #[derive(Clone, Debug)] struct IdDataIndices { index: usize, names: Vec, decorations: Vec, members: Vec, } #[derive(Clone, Debug, Default)] struct StructMemberDataIndices { names: Vec, decorations: Vec, } /// Information associated with an `Id`. #[derive(Clone, Debug)] pub struct IdInfo<'a> { data_indices: &'a IdDataIndices, instructions: &'a [Instruction], } impl<'a> IdInfo<'a> { /// Returns the instruction that defines this `Id` with a `result_id` operand. #[inline] pub fn instruction(&self) -> &'a Instruction { &self.instructions[self.data_indices.index] } /// Returns an iterator over all name debug instructions that target this `Id`. #[inline] pub fn iter_name(&self) -> impl ExactSizeIterator { let instructions = self.instructions; self.data_indices .names .iter() .map(move |&index| &instructions[index]) } /// Returns an iterator over all decorate instructions, that target this `Id`. This includes any /// decorate instructions that target this `Id` indirectly via a `DecorationGroup`. #[inline] pub fn iter_decoration(&self) -> impl ExactSizeIterator { let instructions = self.instructions; self.data_indices .decorations .iter() .map(move |&index| &instructions[index]) } /// If this `Id` refers to a `TypeStruct`, returns an iterator of information about each member /// of the struct. Empty otherwise. #[inline] pub fn iter_members(&self) -> impl ExactSizeIterator> { let instructions = self.instructions; self.data_indices .members .iter() .map(move |data_indices| StructMemberInfo { data_indices, instructions, }) } } /// Information associated with a member of a `TypeStruct` instruction. #[derive(Clone, Debug)] pub struct StructMemberInfo<'a> { data_indices: &'a StructMemberDataIndices, instructions: &'a [Instruction], } impl<'a> StructMemberInfo<'a> { /// Returns an iterator over all name debug instructions that target this struct member. #[inline] pub fn iter_name(&self) -> impl ExactSizeIterator { let instructions = self.instructions; self.data_indices .names .iter() .map(move |&index| &instructions[index]) } /// Returns an iterator over all decorate instructions that target this struct member. This /// includes any decorate instructions that target this member indirectly via a /// `DecorationGroup`. #[inline] pub fn iter_decoration(&self) -> impl ExactSizeIterator { let instructions = self.instructions; self.data_indices .decorations .iter() .map(move |&index| &instructions[index]) } } /// Used in SPIR-V to refer to the result of another instruction. /// /// Ids are global across a module, and are always assigned by exactly one instruction. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] #[repr(transparent)] pub struct Id(u32); impl From for u32 { #[inline] fn from(id: Id) -> u32 { id.0 } } impl Display for Id { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { write!(f, "%{}", self.0) } } /// Helper type for parsing the words of an instruction. #[derive(Debug)] struct InstructionReader<'a> { words: &'a [u32], next_word: usize, instruction: usize, } impl<'a> InstructionReader<'a> { /// Constructs a new reader from a slice of words for a single instruction, including the opcode /// word. `instruction` is the number of the instruction currently being read, and is used for /// error reporting. fn new(words: &'a [u32], instruction: usize) -> Self { debug_assert!(!words.is_empty()); Self { words, next_word: 0, instruction, } } /// Returns whether the reader has reached the end of the current instruction. fn is_empty(&self) -> bool { self.next_word >= self.words.len() } /// Converts the `ParseErrors` enum to the `ParseError` struct, adding contextual information. fn map_err(&self, error: ParseErrors) -> ParseError { ParseError { instruction: self.instruction, word: self.next_word - 1, // -1 because the word has already been read error, words: self.words.to_owned(), } } /// Returns the next word in the sequence. fn next_u32(&mut self) -> Result { let word = *self.words.get(self.next_word).ok_or(ParseError { instruction: self.instruction, word: self.next_word, // No -1 because we didn't advance yet error: ParseErrors::MissingOperands, words: self.words.to_owned(), })?; self.next_word += 1; Ok(word) } /* /// Returns the next two words as a single `u64`. #[inline] fn next_u64(&mut self) -> Result { Ok(self.next_u32()? as u64 | (self.next_u32()? as u64) << 32) } */ /// Reads a nul-terminated string. fn next_string(&mut self) -> Result { let mut bytes = Vec::new(); loop { let word = self.next_u32()?.to_le_bytes(); if let Some(nul) = word.iter().position(|&b| b == 0) { bytes.extend(&word[0..nul]); break; } else { bytes.extend(word); } } String::from_utf8(bytes).map_err(|err| self.map_err(ParseErrors::FromUtf8Error(err))) } /// Reads all remaining words. fn remainder(&mut self) -> Vec { let vec = self.words[self.next_word..].to_owned(); self.next_word = self.words.len(); vec } } /// Error that can happen when reading a SPIR-V module. #[derive(Clone, Debug)] pub enum SpirvError { BadLayout { index: usize, }, DuplicateId { id: Id, first_index: usize, second_index: usize, }, GroupDecorateNotGroup { index: usize, }, IdOutOfBounds { id: Id, index: usize, bound: u32, }, InvalidHeader, MemoryModelInvalid, ParseError(ParseError), } impl Display for SpirvError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { match self { Self::BadLayout { index } => write!( f, "the instruction at index {} does not follow the logical layout of a module", index, ), Self::DuplicateId { id, first_index, second_index, } => write!( f, "id {} is assigned more than once, by instructions {} and {}", id, first_index, second_index, ), Self::GroupDecorateNotGroup { index } => write!( f, "a GroupDecorate or GroupMemberDecorate instruction at index {} referred to an Id \ that was not a DecorationGroup", index, ), Self::IdOutOfBounds { id, bound, index } => write!( f, "id {}, assigned at instruction {}, is not below the maximum bound {}", id, index, bound, ), Self::InvalidHeader => write!(f, "the SPIR-V module header is invalid"), Self::MemoryModelInvalid => { write!(f, "the MemoryModel instruction is not present exactly once") } Self::ParseError(_) => write!(f, "parse error"), } } } impl Error for SpirvError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { Self::ParseError(err) => Some(err), _ => None, } } } impl From for SpirvError { fn from(err: ParseError) -> Self { Self::ParseError(err) } } /// Error that can happen when parsing SPIR-V instructions into Rust data structures. #[derive(Clone, Debug)] pub struct ParseError { /// The instruction number the error happened at, starting from 0. pub instruction: usize, /// The word from the start of the instruction that the error happened at, starting from 0. pub word: usize, /// The error. pub error: ParseErrors, /// The words of the instruction. pub words: Vec, } impl Display for ParseError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { write!( f, "at instruction {}, word {}: {}", self.instruction, self.word, self.error, ) } } impl Error for ParseError {} /// Individual types of parse error that can happen. #[derive(Clone, Debug)] pub enum ParseErrors { FromUtf8Error(FromUtf8Error), LeftoverOperands, MissingOperands, UnexpectedEOF, UnknownEnumerant(&'static str, u32), UnknownOpcode(u16), UnknownSpecConstantOpcode(u16), } impl Display for ParseErrors { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { match self { Self::FromUtf8Error(_) => write!(f, "invalid UTF-8 in string literal"), Self::LeftoverOperands => write!(f, "unparsed operands remaining"), Self::MissingOperands => write!( f, "the instruction and its operands require more words than are present in the \ instruction", ), Self::UnexpectedEOF => write!(f, "encountered unexpected end of file"), Self::UnknownEnumerant(ty, enumerant) => { write!(f, "invalid enumerant {} for enum {}", enumerant, ty) } Self::UnknownOpcode(opcode) => write!(f, "invalid instruction opcode {}", opcode), Self::UnknownSpecConstantOpcode(opcode) => { write!(f, "invalid spec constant instruction opcode {}", opcode) } } } }