mce_amd.c 23 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040
  1. /*
  2. * (c) 2005-2016 Advanced Micro Devices, Inc.
  3. * Your use of this code is subject to the terms and conditions of the
  4. * GNU general public license version 2. See "COPYING" or
  5. * http://www.gnu.org/licenses/gpl.html
  6. *
  7. * Written by Jacob Shin - AMD, Inc.
  8. * Maintained by: Borislav Petkov <bp@alien8.de>
  9. *
  10. * All MC4_MISCi registers are shared between cores on a node.
  11. */
  12. #include <linux/interrupt.h>
  13. #include <linux/notifier.h>
  14. #include <linux/kobject.h>
  15. #include <linux/percpu.h>
  16. #include <linux/errno.h>
  17. #include <linux/sched.h>
  18. #include <linux/sysfs.h>
  19. #include <linux/slab.h>
  20. #include <linux/init.h>
  21. #include <linux/cpu.h>
  22. #include <linux/smp.h>
  23. #include <asm/amd_nb.h>
  24. #include <asm/apic.h>
  25. #include <asm/idle.h>
  26. #include <asm/mce.h>
  27. #include <asm/msr.h>
  28. #include <asm/trace/irq_vectors.h>
  29. #define NR_BLOCKS 5
  30. #define THRESHOLD_MAX 0xFFF
  31. #define INT_TYPE_APIC 0x00020000
  32. #define MASK_VALID_HI 0x80000000
  33. #define MASK_CNTP_HI 0x40000000
  34. #define MASK_LOCKED_HI 0x20000000
  35. #define MASK_LVTOFF_HI 0x00F00000
  36. #define MASK_COUNT_EN_HI 0x00080000
  37. #define MASK_INT_TYPE_HI 0x00060000
  38. #define MASK_OVERFLOW_HI 0x00010000
  39. #define MASK_ERR_COUNT_HI 0x00000FFF
  40. #define MASK_BLKPTR_LO 0xFF000000
  41. #define MCG_XBLK_ADDR 0xC0000400
  42. /* Deferred error settings */
  43. #define MSR_CU_DEF_ERR 0xC0000410
  44. #define MASK_DEF_LVTOFF 0x000000F0
  45. #define MASK_DEF_INT_TYPE 0x00000006
  46. #define DEF_LVT_OFF 0x2
  47. #define DEF_INT_TYPE_APIC 0x2
  48. /* Scalable MCA: */
  49. /* Threshold LVT offset is at MSR0xC0000410[15:12] */
  50. #define SMCA_THR_LVT_OFF 0xF000
  51. static const char * const th_names[] = {
  52. "load_store",
  53. "insn_fetch",
  54. "combined_unit",
  55. "",
  56. "northbridge",
  57. "execution_unit",
  58. };
  59. /* Define HWID to IP type mappings for Scalable MCA */
  60. struct amd_hwid amd_hwids[] = {
  61. [SMCA_F17H_CORE] = { "f17h_core", 0xB0 },
  62. [SMCA_DF] = { "data_fabric", 0x2E },
  63. [SMCA_UMC] = { "umc", 0x96 },
  64. [SMCA_PB] = { "param_block", 0x5 },
  65. [SMCA_PSP] = { "psp", 0xFF },
  66. [SMCA_SMU] = { "smu", 0x1 },
  67. };
  68. EXPORT_SYMBOL_GPL(amd_hwids);
  69. const char * const amd_core_mcablock_names[] = {
  70. [SMCA_LS] = "load_store",
  71. [SMCA_IF] = "insn_fetch",
  72. [SMCA_L2_CACHE] = "l2_cache",
  73. [SMCA_DE] = "decode_unit",
  74. [RES] = "",
  75. [SMCA_EX] = "execution_unit",
  76. [SMCA_FP] = "floating_point",
  77. [SMCA_L3_CACHE] = "l3_cache",
  78. };
  79. EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
  80. const char * const amd_df_mcablock_names[] = {
  81. [SMCA_CS] = "coherent_slave",
  82. [SMCA_PIE] = "pie",
  83. };
  84. EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
  85. static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
  86. static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */
  87. static void amd_threshold_interrupt(void);
  88. static void amd_deferred_error_interrupt(void);
  89. static void default_deferred_error_interrupt(void)
  90. {
  91. pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR);
  92. }
  93. void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
  94. /*
  95. * CPU Initialization
  96. */
  97. struct thresh_restart {
  98. struct threshold_block *b;
  99. int reset;
  100. int set_lvt_off;
  101. int lvt_off;
  102. u16 old_limit;
  103. };
  104. static inline bool is_shared_bank(int bank)
  105. {
  106. /*
  107. * Scalable MCA provides for only one core to have access to the MSRs of
  108. * a shared bank.
  109. */
  110. if (mce_flags.smca)
  111. return false;
  112. /* Bank 4 is for northbridge reporting and is thus shared */
  113. return (bank == 4);
  114. }
  115. static const char *bank4_names(const struct threshold_block *b)
  116. {
  117. switch (b->address) {
  118. /* MSR4_MISC0 */
  119. case 0x00000413:
  120. return "dram";
  121. case 0xc0000408:
  122. return "ht_links";
  123. case 0xc0000409:
  124. return "l3_cache";
  125. default:
  126. WARN(1, "Funny MSR: 0x%08x\n", b->address);
  127. return "";
  128. }
  129. };
  130. static bool lvt_interrupt_supported(unsigned int bank, u32 msr_high_bits)
  131. {
  132. /*
  133. * bank 4 supports APIC LVT interrupts implicitly since forever.
  134. */
  135. if (bank == 4)
  136. return true;
  137. /*
  138. * IntP: interrupt present; if this bit is set, the thresholding
  139. * bank can generate APIC LVT interrupts
  140. */
  141. return msr_high_bits & BIT(28);
  142. }
  143. static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
  144. {
  145. int msr = (hi & MASK_LVTOFF_HI) >> 20;
  146. if (apic < 0) {
  147. pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
  148. "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
  149. b->bank, b->block, b->address, hi, lo);
  150. return 0;
  151. }
  152. if (apic != msr) {
  153. /*
  154. * On SMCA CPUs, LVT offset is programmed at a different MSR, and
  155. * the BIOS provides the value. The original field where LVT offset
  156. * was set is reserved. Return early here:
  157. */
  158. if (mce_flags.smca)
  159. return 0;
  160. pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
  161. "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
  162. b->cpu, apic, b->bank, b->block, b->address, hi, lo);
  163. return 0;
  164. }
  165. return 1;
  166. };
  167. /* Reprogram MCx_MISC MSR behind this threshold bank. */
  168. static void threshold_restart_bank(void *_tr)
  169. {
  170. struct thresh_restart *tr = _tr;
  171. u32 hi, lo;
  172. rdmsr(tr->b->address, lo, hi);
  173. if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
  174. tr->reset = 1; /* limit cannot be lower than err count */
  175. if (tr->reset) { /* reset err count and overflow bit */
  176. hi =
  177. (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
  178. (THRESHOLD_MAX - tr->b->threshold_limit);
  179. } else if (tr->old_limit) { /* change limit w/o reset */
  180. int new_count = (hi & THRESHOLD_MAX) +
  181. (tr->old_limit - tr->b->threshold_limit);
  182. hi = (hi & ~MASK_ERR_COUNT_HI) |
  183. (new_count & THRESHOLD_MAX);
  184. }
  185. /* clear IntType */
  186. hi &= ~MASK_INT_TYPE_HI;
  187. if (!tr->b->interrupt_capable)
  188. goto done;
  189. if (tr->set_lvt_off) {
  190. if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
  191. /* set new lvt offset */
  192. hi &= ~MASK_LVTOFF_HI;
  193. hi |= tr->lvt_off << 20;
  194. }
  195. }
  196. if (tr->b->interrupt_enable)
  197. hi |= INT_TYPE_APIC;
  198. done:
  199. hi |= MASK_COUNT_EN_HI;
  200. wrmsr(tr->b->address, lo, hi);
  201. }
  202. static void mce_threshold_block_init(struct threshold_block *b, int offset)
  203. {
  204. struct thresh_restart tr = {
  205. .b = b,
  206. .set_lvt_off = 1,
  207. .lvt_off = offset,
  208. };
  209. b->threshold_limit = THRESHOLD_MAX;
  210. threshold_restart_bank(&tr);
  211. };
  212. static int setup_APIC_mce_threshold(int reserved, int new)
  213. {
  214. if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
  215. APIC_EILVT_MSG_FIX, 0))
  216. return new;
  217. return reserved;
  218. }
  219. static int setup_APIC_deferred_error(int reserved, int new)
  220. {
  221. if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR,
  222. APIC_EILVT_MSG_FIX, 0))
  223. return new;
  224. return reserved;
  225. }
  226. static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
  227. {
  228. u32 low = 0, high = 0;
  229. int def_offset = -1, def_new;
  230. if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high))
  231. return;
  232. def_new = (low & MASK_DEF_LVTOFF) >> 4;
  233. if (!(low & MASK_DEF_LVTOFF)) {
  234. pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n");
  235. def_new = DEF_LVT_OFF;
  236. low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4);
  237. }
  238. def_offset = setup_APIC_deferred_error(def_offset, def_new);
  239. if ((def_offset == def_new) &&
  240. (deferred_error_int_vector != amd_deferred_error_interrupt))
  241. deferred_error_int_vector = amd_deferred_error_interrupt;
  242. low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC;
  243. wrmsr(MSR_CU_DEF_ERR, low, high);
  244. }
  245. static u32 get_block_address(u32 current_addr, u32 low, u32 high,
  246. unsigned int bank, unsigned int block)
  247. {
  248. u32 addr = 0, offset = 0;
  249. if (mce_flags.smca) {
  250. if (!block) {
  251. addr = MSR_AMD64_SMCA_MCx_MISC(bank);
  252. } else {
  253. /*
  254. * For SMCA enabled processors, BLKPTR field of the
  255. * first MISC register (MCx_MISC0) indicates presence of
  256. * additional MISC register set (MISC1-4).
  257. */
  258. u32 low, high;
  259. if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
  260. return addr;
  261. if (!(low & MCI_CONFIG_MCAX))
  262. return addr;
  263. if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
  264. (low & MASK_BLKPTR_LO))
  265. addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
  266. }
  267. return addr;
  268. }
  269. /* Fall back to method we used for older processors: */
  270. switch (block) {
  271. case 0:
  272. addr = msr_ops.misc(bank);
  273. break;
  274. case 1:
  275. offset = ((low & MASK_BLKPTR_LO) >> 21);
  276. if (offset)
  277. addr = MCG_XBLK_ADDR + offset;
  278. break;
  279. default:
  280. addr = ++current_addr;
  281. }
  282. return addr;
  283. }
  284. static int
  285. prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
  286. int offset, u32 misc_high)
  287. {
  288. unsigned int cpu = smp_processor_id();
  289. u32 smca_low, smca_high, smca_addr;
  290. struct threshold_block b;
  291. int new;
  292. if (!block)
  293. per_cpu(bank_map, cpu) |= (1 << bank);
  294. memset(&b, 0, sizeof(b));
  295. b.cpu = cpu;
  296. b.bank = bank;
  297. b.block = block;
  298. b.address = addr;
  299. b.interrupt_capable = lvt_interrupt_supported(bank, misc_high);
  300. if (!b.interrupt_capable)
  301. goto done;
  302. b.interrupt_enable = 1;
  303. if (!mce_flags.smca) {
  304. new = (misc_high & MASK_LVTOFF_HI) >> 20;
  305. goto set_offset;
  306. }
  307. smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
  308. if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
  309. /*
  310. * OS is required to set the MCAX bit to acknowledge that it is
  311. * now using the new MSR ranges and new registers under each
  312. * bank. It also means that the OS will configure deferred
  313. * errors in the new MCx_CONFIG register. If the bit is not set,
  314. * uncorrectable errors will cause a system panic.
  315. *
  316. * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the MSR.)
  317. */
  318. smca_high |= BIT(0);
  319. /*
  320. * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
  321. * registers with the option of additionally logging to
  322. * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
  323. *
  324. * This bit is usually set by BIOS to retain the old behavior
  325. * for OSes that don't use the new registers. Linux supports the
  326. * new registers so let's disable that additional logging here.
  327. *
  328. * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
  329. * portion of the MSR).
  330. */
  331. smca_high &= ~BIT(2);
  332. wrmsr(smca_addr, smca_low, smca_high);
  333. }
  334. /* Gather LVT offset for thresholding: */
  335. if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
  336. goto out;
  337. new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
  338. set_offset:
  339. offset = setup_APIC_mce_threshold(offset, new);
  340. if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt))
  341. mce_threshold_vector = amd_threshold_interrupt;
  342. done:
  343. mce_threshold_block_init(&b, offset);
  344. out:
  345. return offset;
  346. }
  347. /* cpu init entry point, called from mce.c with preempt off */
  348. void mce_amd_feature_init(struct cpuinfo_x86 *c)
  349. {
  350. u32 low = 0, high = 0, address = 0;
  351. unsigned int bank, block;
  352. int offset = -1;
  353. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  354. for (block = 0; block < NR_BLOCKS; ++block) {
  355. address = get_block_address(address, low, high, bank, block);
  356. if (!address)
  357. break;
  358. if (rdmsr_safe(address, &low, &high))
  359. break;
  360. if (!(high & MASK_VALID_HI))
  361. continue;
  362. if (!(high & MASK_CNTP_HI) ||
  363. (high & MASK_LOCKED_HI))
  364. continue;
  365. offset = prepare_threshold_block(bank, block, address, offset, high);
  366. }
  367. }
  368. if (mce_flags.succor)
  369. deferred_error_interrupt_enable(c);
  370. }
  371. static void
  372. __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
  373. {
  374. u32 msr_status = msr_ops.status(bank);
  375. u32 msr_addr = msr_ops.addr(bank);
  376. struct mce m;
  377. u64 status;
  378. WARN_ON_ONCE(deferred_err && threshold_err);
  379. if (deferred_err && mce_flags.smca) {
  380. msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
  381. msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
  382. }
  383. rdmsrl(msr_status, status);
  384. if (!(status & MCI_STATUS_VAL))
  385. return;
  386. mce_setup(&m);
  387. m.status = status;
  388. m.bank = bank;
  389. if (threshold_err)
  390. m.misc = misc;
  391. if (m.status & MCI_STATUS_ADDRV)
  392. rdmsrl(msr_addr, m.addr);
  393. mce_log(&m);
  394. wrmsrl(msr_status, 0);
  395. }
  396. static inline void __smp_deferred_error_interrupt(void)
  397. {
  398. inc_irq_stat(irq_deferred_error_count);
  399. deferred_error_int_vector();
  400. }
  401. asmlinkage __visible void smp_deferred_error_interrupt(void)
  402. {
  403. entering_irq();
  404. __smp_deferred_error_interrupt();
  405. exiting_ack_irq();
  406. }
  407. asmlinkage __visible void smp_trace_deferred_error_interrupt(void)
  408. {
  409. entering_irq();
  410. trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
  411. __smp_deferred_error_interrupt();
  412. trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
  413. exiting_ack_irq();
  414. }
  415. /* APIC interrupt handler for deferred errors */
  416. static void amd_deferred_error_interrupt(void)
  417. {
  418. unsigned int bank;
  419. u32 msr_status;
  420. u64 status;
  421. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  422. msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
  423. : msr_ops.status(bank);
  424. rdmsrl(msr_status, status);
  425. if (!(status & MCI_STATUS_VAL) ||
  426. !(status & MCI_STATUS_DEFERRED))
  427. continue;
  428. __log_error(bank, true, false, 0);
  429. break;
  430. }
  431. }
  432. /*
  433. * APIC Interrupt Handler
  434. */
  435. /*
  436. * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
  437. * the interrupt goes off when error_count reaches threshold_limit.
  438. * the handler will simply log mcelog w/ software defined bank number.
  439. */
  440. static void amd_threshold_interrupt(void)
  441. {
  442. u32 low = 0, high = 0, address = 0;
  443. int cpu = smp_processor_id();
  444. unsigned int bank, block;
  445. /* assume first bank caused it */
  446. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  447. if (!(per_cpu(bank_map, cpu) & (1 << bank)))
  448. continue;
  449. for (block = 0; block < NR_BLOCKS; ++block) {
  450. address = get_block_address(address, low, high, bank, block);
  451. if (!address)
  452. break;
  453. if (rdmsr_safe(address, &low, &high))
  454. break;
  455. if (!(high & MASK_VALID_HI)) {
  456. if (block)
  457. continue;
  458. else
  459. break;
  460. }
  461. if (!(high & MASK_CNTP_HI) ||
  462. (high & MASK_LOCKED_HI))
  463. continue;
  464. /*
  465. * Log the machine check that caused the threshold
  466. * event.
  467. */
  468. if (high & MASK_OVERFLOW_HI)
  469. goto log;
  470. }
  471. }
  472. return;
  473. log:
  474. __log_error(bank, false, true, ((u64)high << 32) | low);
  475. }
  476. /*
  477. * Sysfs Interface
  478. */
  479. struct threshold_attr {
  480. struct attribute attr;
  481. ssize_t (*show) (struct threshold_block *, char *);
  482. ssize_t (*store) (struct threshold_block *, const char *, size_t count);
  483. };
  484. #define SHOW_FIELDS(name) \
  485. static ssize_t show_ ## name(struct threshold_block *b, char *buf) \
  486. { \
  487. return sprintf(buf, "%lu\n", (unsigned long) b->name); \
  488. }
  489. SHOW_FIELDS(interrupt_enable)
  490. SHOW_FIELDS(threshold_limit)
  491. static ssize_t
  492. store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
  493. {
  494. struct thresh_restart tr;
  495. unsigned long new;
  496. if (!b->interrupt_capable)
  497. return -EINVAL;
  498. if (kstrtoul(buf, 0, &new) < 0)
  499. return -EINVAL;
  500. b->interrupt_enable = !!new;
  501. memset(&tr, 0, sizeof(tr));
  502. tr.b = b;
  503. smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
  504. return size;
  505. }
  506. static ssize_t
  507. store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
  508. {
  509. struct thresh_restart tr;
  510. unsigned long new;
  511. if (kstrtoul(buf, 0, &new) < 0)
  512. return -EINVAL;
  513. if (new > THRESHOLD_MAX)
  514. new = THRESHOLD_MAX;
  515. if (new < 1)
  516. new = 1;
  517. memset(&tr, 0, sizeof(tr));
  518. tr.old_limit = b->threshold_limit;
  519. b->threshold_limit = new;
  520. tr.b = b;
  521. smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
  522. return size;
  523. }
  524. static ssize_t show_error_count(struct threshold_block *b, char *buf)
  525. {
  526. u32 lo, hi;
  527. rdmsr_on_cpu(b->cpu, b->address, &lo, &hi);
  528. return sprintf(buf, "%u\n", ((hi & THRESHOLD_MAX) -
  529. (THRESHOLD_MAX - b->threshold_limit)));
  530. }
  531. static struct threshold_attr error_count = {
  532. .attr = {.name = __stringify(error_count), .mode = 0444 },
  533. .show = show_error_count,
  534. };
  535. #define RW_ATTR(val) \
  536. static struct threshold_attr val = { \
  537. .attr = {.name = __stringify(val), .mode = 0644 }, \
  538. .show = show_## val, \
  539. .store = store_## val, \
  540. };
  541. RW_ATTR(interrupt_enable);
  542. RW_ATTR(threshold_limit);
  543. static struct attribute *default_attrs[] = {
  544. &threshold_limit.attr,
  545. &error_count.attr,
  546. NULL, /* possibly interrupt_enable if supported, see below */
  547. NULL,
  548. };
  549. #define to_block(k) container_of(k, struct threshold_block, kobj)
  550. #define to_attr(a) container_of(a, struct threshold_attr, attr)
  551. static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
  552. {
  553. struct threshold_block *b = to_block(kobj);
  554. struct threshold_attr *a = to_attr(attr);
  555. ssize_t ret;
  556. ret = a->show ? a->show(b, buf) : -EIO;
  557. return ret;
  558. }
  559. static ssize_t store(struct kobject *kobj, struct attribute *attr,
  560. const char *buf, size_t count)
  561. {
  562. struct threshold_block *b = to_block(kobj);
  563. struct threshold_attr *a = to_attr(attr);
  564. ssize_t ret;
  565. ret = a->store ? a->store(b, buf, count) : -EIO;
  566. return ret;
  567. }
  568. static const struct sysfs_ops threshold_ops = {
  569. .show = show,
  570. .store = store,
  571. };
  572. static struct kobj_type threshold_ktype = {
  573. .sysfs_ops = &threshold_ops,
  574. .default_attrs = default_attrs,
  575. };
  576. static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
  577. unsigned int block, u32 address)
  578. {
  579. struct threshold_block *b = NULL;
  580. u32 low, high;
  581. int err;
  582. if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS))
  583. return 0;
  584. if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
  585. return 0;
  586. if (!(high & MASK_VALID_HI)) {
  587. if (block)
  588. goto recurse;
  589. else
  590. return 0;
  591. }
  592. if (!(high & MASK_CNTP_HI) ||
  593. (high & MASK_LOCKED_HI))
  594. goto recurse;
  595. b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL);
  596. if (!b)
  597. return -ENOMEM;
  598. b->block = block;
  599. b->bank = bank;
  600. b->cpu = cpu;
  601. b->address = address;
  602. b->interrupt_enable = 0;
  603. b->interrupt_capable = lvt_interrupt_supported(bank, high);
  604. b->threshold_limit = THRESHOLD_MAX;
  605. if (b->interrupt_capable) {
  606. threshold_ktype.default_attrs[2] = &interrupt_enable.attr;
  607. b->interrupt_enable = 1;
  608. } else {
  609. threshold_ktype.default_attrs[2] = NULL;
  610. }
  611. INIT_LIST_HEAD(&b->miscj);
  612. if (per_cpu(threshold_banks, cpu)[bank]->blocks) {
  613. list_add(&b->miscj,
  614. &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj);
  615. } else {
  616. per_cpu(threshold_banks, cpu)[bank]->blocks = b;
  617. }
  618. err = kobject_init_and_add(&b->kobj, &threshold_ktype,
  619. per_cpu(threshold_banks, cpu)[bank]->kobj,
  620. (bank == 4 ? bank4_names(b) : th_names[bank]));
  621. if (err)
  622. goto out_free;
  623. recurse:
  624. address = get_block_address(address, low, high, bank, ++block);
  625. if (!address)
  626. return 0;
  627. err = allocate_threshold_blocks(cpu, bank, block, address);
  628. if (err)
  629. goto out_free;
  630. if (b)
  631. kobject_uevent(&b->kobj, KOBJ_ADD);
  632. return err;
  633. out_free:
  634. if (b) {
  635. kobject_put(&b->kobj);
  636. list_del(&b->miscj);
  637. kfree(b);
  638. }
  639. return err;
  640. }
  641. static int __threshold_add_blocks(struct threshold_bank *b)
  642. {
  643. struct list_head *head = &b->blocks->miscj;
  644. struct threshold_block *pos = NULL;
  645. struct threshold_block *tmp = NULL;
  646. int err = 0;
  647. err = kobject_add(&b->blocks->kobj, b->kobj, b->blocks->kobj.name);
  648. if (err)
  649. return err;
  650. list_for_each_entry_safe(pos, tmp, head, miscj) {
  651. err = kobject_add(&pos->kobj, b->kobj, pos->kobj.name);
  652. if (err) {
  653. list_for_each_entry_safe_reverse(pos, tmp, head, miscj)
  654. kobject_del(&pos->kobj);
  655. return err;
  656. }
  657. }
  658. return err;
  659. }
  660. static int threshold_create_bank(unsigned int cpu, unsigned int bank)
  661. {
  662. struct device *dev = per_cpu(mce_device, cpu);
  663. struct amd_northbridge *nb = NULL;
  664. struct threshold_bank *b = NULL;
  665. const char *name = th_names[bank];
  666. int err = 0;
  667. if (is_shared_bank(bank)) {
  668. nb = node_to_amd_nb(amd_get_nb_id(cpu));
  669. /* threshold descriptor already initialized on this node? */
  670. if (nb && nb->bank4) {
  671. /* yes, use it */
  672. b = nb->bank4;
  673. err = kobject_add(b->kobj, &dev->kobj, name);
  674. if (err)
  675. goto out;
  676. per_cpu(threshold_banks, cpu)[bank] = b;
  677. atomic_inc(&b->cpus);
  678. err = __threshold_add_blocks(b);
  679. goto out;
  680. }
  681. }
  682. b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL);
  683. if (!b) {
  684. err = -ENOMEM;
  685. goto out;
  686. }
  687. b->kobj = kobject_create_and_add(name, &dev->kobj);
  688. if (!b->kobj) {
  689. err = -EINVAL;
  690. goto out_free;
  691. }
  692. per_cpu(threshold_banks, cpu)[bank] = b;
  693. if (is_shared_bank(bank)) {
  694. atomic_set(&b->cpus, 1);
  695. /* nb is already initialized, see above */
  696. if (nb) {
  697. WARN_ON(nb->bank4);
  698. nb->bank4 = b;
  699. }
  700. }
  701. err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
  702. if (!err)
  703. goto out;
  704. out_free:
  705. kfree(b);
  706. out:
  707. return err;
  708. }
  709. /* create dir/files for all valid threshold banks */
  710. static int threshold_create_device(unsigned int cpu)
  711. {
  712. unsigned int bank;
  713. struct threshold_bank **bp;
  714. int err = 0;
  715. bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
  716. GFP_KERNEL);
  717. if (!bp)
  718. return -ENOMEM;
  719. per_cpu(threshold_banks, cpu) = bp;
  720. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  721. if (!(per_cpu(bank_map, cpu) & (1 << bank)))
  722. continue;
  723. err = threshold_create_bank(cpu, bank);
  724. if (err)
  725. return err;
  726. }
  727. return err;
  728. }
  729. static void deallocate_threshold_block(unsigned int cpu,
  730. unsigned int bank)
  731. {
  732. struct threshold_block *pos = NULL;
  733. struct threshold_block *tmp = NULL;
  734. struct threshold_bank *head = per_cpu(threshold_banks, cpu)[bank];
  735. if (!head)
  736. return;
  737. list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) {
  738. kobject_put(&pos->kobj);
  739. list_del(&pos->miscj);
  740. kfree(pos);
  741. }
  742. kfree(per_cpu(threshold_banks, cpu)[bank]->blocks);
  743. per_cpu(threshold_banks, cpu)[bank]->blocks = NULL;
  744. }
  745. static void __threshold_remove_blocks(struct threshold_bank *b)
  746. {
  747. struct threshold_block *pos = NULL;
  748. struct threshold_block *tmp = NULL;
  749. kobject_del(b->kobj);
  750. list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
  751. kobject_del(&pos->kobj);
  752. }
  753. static void threshold_remove_bank(unsigned int cpu, int bank)
  754. {
  755. struct amd_northbridge *nb;
  756. struct threshold_bank *b;
  757. b = per_cpu(threshold_banks, cpu)[bank];
  758. if (!b)
  759. return;
  760. if (!b->blocks)
  761. goto free_out;
  762. if (is_shared_bank(bank)) {
  763. if (!atomic_dec_and_test(&b->cpus)) {
  764. __threshold_remove_blocks(b);
  765. per_cpu(threshold_banks, cpu)[bank] = NULL;
  766. return;
  767. } else {
  768. /*
  769. * the last CPU on this node using the shared bank is
  770. * going away, remove that bank now.
  771. */
  772. nb = node_to_amd_nb(amd_get_nb_id(cpu));
  773. nb->bank4 = NULL;
  774. }
  775. }
  776. deallocate_threshold_block(cpu, bank);
  777. free_out:
  778. kobject_del(b->kobj);
  779. kobject_put(b->kobj);
  780. kfree(b);
  781. per_cpu(threshold_banks, cpu)[bank] = NULL;
  782. }
  783. static void threshold_remove_device(unsigned int cpu)
  784. {
  785. unsigned int bank;
  786. for (bank = 0; bank < mca_cfg.banks; ++bank) {
  787. if (!(per_cpu(bank_map, cpu) & (1 << bank)))
  788. continue;
  789. threshold_remove_bank(cpu, bank);
  790. }
  791. kfree(per_cpu(threshold_banks, cpu));
  792. }
  793. /* get notified when a cpu comes on/off */
  794. static void
  795. amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu)
  796. {
  797. switch (action) {
  798. case CPU_ONLINE:
  799. case CPU_ONLINE_FROZEN:
  800. threshold_create_device(cpu);
  801. break;
  802. case CPU_DEAD:
  803. case CPU_DEAD_FROZEN:
  804. threshold_remove_device(cpu);
  805. break;
  806. default:
  807. break;
  808. }
  809. }
  810. static __init int threshold_init_device(void)
  811. {
  812. unsigned lcpu = 0;
  813. /* to hit CPUs online before the notifier is up */
  814. for_each_online_cpu(lcpu) {
  815. int err = threshold_create_device(lcpu);
  816. if (err)
  817. return err;
  818. }
  819. threshold_cpu_callback = amd_64_threshold_cpu_callback;
  820. return 0;
  821. }
  822. /*
  823. * there are 3 funcs which need to be _initcalled in a logic sequence:
  824. * 1. xen_late_init_mcelog
  825. * 2. mcheck_init_device
  826. * 3. threshold_init_device
  827. *
  828. * xen_late_init_mcelog must register xen_mce_chrdev_device before
  829. * native mce_chrdev_device registration if running under xen platform;
  830. *
  831. * mcheck_init_device should be inited before threshold_init_device to
  832. * initialize mce_device, otherwise a NULL ptr dereference will cause panic.
  833. *
  834. * so we use following _initcalls
  835. * 1. device_initcall(xen_late_init_mcelog);
  836. * 2. device_initcall_sync(mcheck_init_device);
  837. * 3. late_initcall(threshold_init_device);
  838. *
  839. * when running under xen, the initcall order is 1,2,3;
  840. * on baremetal, we skip 1 and we do only 2 and 3.
  841. */
  842. late_initcall(threshold_init_device);