|
@@ -35,9 +35,134 @@ struct OpalHmiEvtNode {
|
|
struct list_head list;
|
|
struct list_head list;
|
|
struct OpalHMIEvent hmi_evt;
|
|
struct OpalHMIEvent hmi_evt;
|
|
};
|
|
};
|
|
|
|
+
|
|
|
|
+struct xstop_reason {
|
|
|
|
+ uint32_t xstop_reason;
|
|
|
|
+ const char *unit_failed;
|
|
|
|
+ const char *description;
|
|
|
|
+};
|
|
|
|
+
|
|
static LIST_HEAD(opal_hmi_evt_list);
|
|
static LIST_HEAD(opal_hmi_evt_list);
|
|
static DEFINE_SPINLOCK(opal_hmi_evt_lock);
|
|
static DEFINE_SPINLOCK(opal_hmi_evt_lock);
|
|
|
|
|
|
|
|
+static void print_core_checkstop_reason(const char *level,
|
|
|
|
+ struct OpalHMIEvent *hmi_evt)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ static const struct xstop_reason xstop_reason[] = {
|
|
|
|
+ { CORE_CHECKSTOP_IFU_REGFILE, "IFU",
|
|
|
|
+ "RegFile core check stop" },
|
|
|
|
+ { CORE_CHECKSTOP_IFU_LOGIC, "IFU", "Logic core check stop" },
|
|
|
|
+ { CORE_CHECKSTOP_PC_DURING_RECOV, "PC",
|
|
|
|
+ "Core checkstop during recovery" },
|
|
|
|
+ { CORE_CHECKSTOP_ISU_REGFILE, "ISU",
|
|
|
|
+ "RegFile core check stop (mapper error)" },
|
|
|
|
+ { CORE_CHECKSTOP_ISU_LOGIC, "ISU", "Logic core check stop" },
|
|
|
|
+ { CORE_CHECKSTOP_FXU_LOGIC, "FXU", "Logic core check stop" },
|
|
|
|
+ { CORE_CHECKSTOP_VSU_LOGIC, "VSU", "Logic core check stop" },
|
|
|
|
+ { CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE, "PC",
|
|
|
|
+ "Recovery in maintenance mode" },
|
|
|
|
+ { CORE_CHECKSTOP_LSU_REGFILE, "LSU",
|
|
|
|
+ "RegFile core check stop" },
|
|
|
|
+ { CORE_CHECKSTOP_PC_FWD_PROGRESS, "PC",
|
|
|
|
+ "Forward Progress Error" },
|
|
|
|
+ { CORE_CHECKSTOP_LSU_LOGIC, "LSU", "Logic core check stop" },
|
|
|
|
+ { CORE_CHECKSTOP_PC_LOGIC, "PC", "Logic core check stop" },
|
|
|
|
+ { CORE_CHECKSTOP_PC_HYP_RESOURCE, "PC",
|
|
|
|
+ "Hypervisor Resource error - core check stop" },
|
|
|
|
+ { CORE_CHECKSTOP_PC_HANG_RECOV_FAILED, "PC",
|
|
|
|
+ "Hang Recovery Failed (core check stop)" },
|
|
|
|
+ { CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED, "PC",
|
|
|
|
+ "Ambiguous Hang Detected (unknown source)" },
|
|
|
|
+ { CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ, "PC",
|
|
|
|
+ "Debug Trigger Error inject" },
|
|
|
|
+ { CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ, "PC",
|
|
|
|
+ "Hypervisor check stop via SPRC/SPRD" },
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ /* Validity check */
|
|
|
|
+ if (!hmi_evt->u.xstop_error.xstop_reason) {
|
|
|
|
+ printk("%s Unknown Core check stop.\n", level);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ printk("%s CPU PIR: %08x\n", level,
|
|
|
|
+ be32_to_cpu(hmi_evt->u.xstop_error.u.pir));
|
|
|
|
+ for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
|
|
|
|
+ if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
|
|
|
|
+ xstop_reason[i].xstop_reason)
|
|
|
|
+ printk("%s [Unit: %-3s] %s\n", level,
|
|
|
|
+ xstop_reason[i].unit_failed,
|
|
|
|
+ xstop_reason[i].description);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void print_nx_checkstop_reason(const char *level,
|
|
|
|
+ struct OpalHMIEvent *hmi_evt)
|
|
|
|
+{
|
|
|
|
+ int i;
|
|
|
|
+ static const struct xstop_reason xstop_reason[] = {
|
|
|
|
+ { NX_CHECKSTOP_SHM_INVAL_STATE_ERR, "DMA & Engine",
|
|
|
|
+ "SHM invalid state error" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1, "DMA & Engine",
|
|
|
|
+ "DMA invalid state error bit 15" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2, "DMA & Engine",
|
|
|
|
+ "DMA invalid state error bit 16" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR, "DMA & Engine",
|
|
|
|
+ "Channel 0 invalid state error" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR, "DMA & Engine",
|
|
|
|
+ "Channel 1 invalid state error" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR, "DMA & Engine",
|
|
|
|
+ "Channel 2 invalid state error" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR, "DMA & Engine",
|
|
|
|
+ "Channel 3 invalid state error" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR, "DMA & Engine",
|
|
|
|
+ "Channel 4 invalid state error" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR, "DMA & Engine",
|
|
|
|
+ "Channel 5 invalid state error" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR, "DMA & Engine",
|
|
|
|
+ "Channel 6 invalid state error" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR, "DMA & Engine",
|
|
|
|
+ "Channel 7 invalid state error" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CRB_UE, "DMA & Engine",
|
|
|
|
+ "UE error on CRB(CSB address, CCB)" },
|
|
|
|
+ { NX_CHECKSTOP_DMA_CRB_SUE, "DMA & Engine",
|
|
|
|
+ "SUE error on CRB(CSB address, CCB)" },
|
|
|
|
+ { NX_CHECKSTOP_PBI_ISN_UE, "PowerBus Interface",
|
|
|
|
+ "CRB Kill ISN received while holding ISN with UE error" },
|
|
|
|
+ };
|
|
|
|
+
|
|
|
|
+ /* Validity check */
|
|
|
|
+ if (!hmi_evt->u.xstop_error.xstop_reason) {
|
|
|
|
+ printk("%s Unknown NX check stop.\n", level);
|
|
|
|
+ return;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ printk("%s NX checkstop on CHIP ID: %x\n", level,
|
|
|
|
+ be32_to_cpu(hmi_evt->u.xstop_error.u.chip_id));
|
|
|
|
+ for (i = 0; i < ARRAY_SIZE(xstop_reason); i++)
|
|
|
|
+ if (be32_to_cpu(hmi_evt->u.xstop_error.xstop_reason) &
|
|
|
|
+ xstop_reason[i].xstop_reason)
|
|
|
|
+ printk("%s [Unit: %-3s] %s\n", level,
|
|
|
|
+ xstop_reason[i].unit_failed,
|
|
|
|
+ xstop_reason[i].description);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static void print_checkstop_reason(const char *level,
|
|
|
|
+ struct OpalHMIEvent *hmi_evt)
|
|
|
|
+{
|
|
|
|
+ switch (hmi_evt->u.xstop_error.xstop_type) {
|
|
|
|
+ case CHECKSTOP_TYPE_CORE:
|
|
|
|
+ print_core_checkstop_reason(level, hmi_evt);
|
|
|
|
+ break;
|
|
|
|
+ case CHECKSTOP_TYPE_NX:
|
|
|
|
+ print_nx_checkstop_reason(level, hmi_evt);
|
|
|
|
+ break;
|
|
|
|
+ case CHECKSTOP_TYPE_UNKNOWN:
|
|
|
|
+ printk("%s Unknown Malfunction Alert.\n", level);
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
|
|
static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
|
|
{
|
|
{
|
|
const char *level, *sevstr, *error_info;
|
|
const char *level, *sevstr, *error_info;
|
|
@@ -95,6 +220,13 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
|
|
(hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
|
|
(hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
|
|
printk("%s TFMR: %016llx\n", level,
|
|
printk("%s TFMR: %016llx\n", level,
|
|
be64_to_cpu(hmi_evt->tfmr));
|
|
be64_to_cpu(hmi_evt->tfmr));
|
|
|
|
+
|
|
|
|
+ if (hmi_evt->version < OpalHMIEvt_V2)
|
|
|
|
+ return;
|
|
|
|
+
|
|
|
|
+ /* OpalHMIEvt_V2 and above provides reason for malfunction alert. */
|
|
|
|
+ if (hmi_evt->type == OpalHMI_ERROR_MALFUNC_ALERT)
|
|
|
|
+ print_checkstop_reason(level, hmi_evt);
|
|
}
|
|
}
|
|
|
|
|
|
static void hmi_event_handler(struct work_struct *work)
|
|
static void hmi_event_handler(struct work_struct *work)
|
|
@@ -103,6 +235,8 @@ static void hmi_event_handler(struct work_struct *work)
|
|
struct OpalHMIEvent *hmi_evt;
|
|
struct OpalHMIEvent *hmi_evt;
|
|
struct OpalHmiEvtNode *msg_node;
|
|
struct OpalHmiEvtNode *msg_node;
|
|
uint8_t disposition;
|
|
uint8_t disposition;
|
|
|
|
+ struct opal_msg msg;
|
|
|
|
+ int unrecoverable = 0;
|
|
|
|
|
|
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
|
|
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
|
|
while (!list_empty(&opal_hmi_evt_list)) {
|
|
while (!list_empty(&opal_hmi_evt_list)) {
|
|
@@ -118,14 +252,53 @@ static void hmi_event_handler(struct work_struct *work)
|
|
|
|
|
|
/*
|
|
/*
|
|
* Check if HMI event has been recovered or not. If not
|
|
* Check if HMI event has been recovered or not. If not
|
|
- * then we can't continue, invoke panic.
|
|
|
|
|
|
+ * then kernel can't continue, we need to panic.
|
|
|
|
+ * But before we do that, display all the HMI event
|
|
|
|
+ * available on the list and set unrecoverable flag to 1.
|
|
*/
|
|
*/
|
|
if (disposition != OpalHMI_DISPOSITION_RECOVERED)
|
|
if (disposition != OpalHMI_DISPOSITION_RECOVERED)
|
|
- panic("Unrecoverable HMI exception");
|
|
|
|
|
|
+ unrecoverable = 1;
|
|
|
|
|
|
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
|
|
spin_lock_irqsave(&opal_hmi_evt_lock, flags);
|
|
}
|
|
}
|
|
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
|
|
spin_unlock_irqrestore(&opal_hmi_evt_lock, flags);
|
|
|
|
+
|
|
|
|
+ if (unrecoverable) {
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ /* Pull all HMI events from OPAL before we panic. */
|
|
|
|
+ while (opal_get_msg(__pa(&msg), sizeof(msg)) == OPAL_SUCCESS) {
|
|
|
|
+ u32 type;
|
|
|
|
+
|
|
|
|
+ type = be32_to_cpu(msg.msg_type);
|
|
|
|
+
|
|
|
|
+ /* skip if not HMI event */
|
|
|
|
+ if (type != OPAL_MSG_HMI_EVT)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ /* HMI event info starts from param[0] */
|
|
|
|
+ hmi_evt = (struct OpalHMIEvent *)&msg.params[0];
|
|
|
|
+ print_hmi_event_info(hmi_evt);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Unrecoverable HMI exception. We need to inform BMC/OCC
|
|
|
|
+ * about this error so that it can collect relevant data
|
|
|
|
+ * for error analysis before rebooting.
|
|
|
|
+ */
|
|
|
|
+ ret = opal_cec_reboot2(OPAL_REBOOT_PLATFORM_ERROR,
|
|
|
|
+ "Unrecoverable HMI exception");
|
|
|
|
+ if (ret == OPAL_UNSUPPORTED) {
|
|
|
|
+ pr_emerg("Reboot type %d not supported\n",
|
|
|
|
+ OPAL_REBOOT_PLATFORM_ERROR);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Fall through and panic if opal_cec_reboot2() returns
|
|
|
|
+ * OPAL_UNSUPPORTED.
|
|
|
|
+ */
|
|
|
|
+ panic("Unrecoverable HMI exception");
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
static DECLARE_WORK(hmi_event_work, hmi_event_handler);
|
|
static DECLARE_WORK(hmi_event_work, hmi_event_handler);
|