amdgpu_xgmi.c 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. /*
  2. * Copyright 2018 Advanced Micro Devices, Inc.
  3. *
  4. * Permission is hereby granted, free of charge, to any person obtaining a
  5. * copy of this software and associated documentation files (the "Software"),
  6. * to deal in the Software without restriction, including without limitation
  7. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8. * and/or sell copies of the Software, and to permit persons to whom the
  9. * Software is furnished to do so, subject to the following conditions:
  10. *
  11. * The above copyright notice and this permission notice shall be included in
  12. * all copies or substantial portions of the Software.
  13. *
  14. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  17. * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18. * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20. * OTHER DEALINGS IN THE SOFTWARE.
  21. *
  22. *
  23. */
  24. #include <linux/list.h>
  25. #include "amdgpu.h"
  26. #include "amdgpu_psp.h"
  27. static DEFINE_MUTEX(xgmi_mutex);
  28. #define AMDGPU_MAX_XGMI_HIVE 8
  29. #define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
  30. struct amdgpu_hive_info {
  31. uint64_t hive_id;
  32. struct list_head device_list;
  33. };
  34. static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
  35. static unsigned hive_count = 0;
  36. static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
  37. {
  38. int i;
  39. struct amdgpu_hive_info *tmp;
  40. if (!adev->gmc.xgmi.hive_id)
  41. return NULL;
  42. for (i = 0 ; i < hive_count; ++i) {
  43. tmp = &xgmi_hives[i];
  44. if (tmp->hive_id == adev->gmc.xgmi.hive_id)
  45. return tmp;
  46. }
  47. if (i >= AMDGPU_MAX_XGMI_HIVE)
  48. return NULL;
  49. /* initialize new hive if not exist */
  50. tmp = &xgmi_hives[hive_count++];
  51. tmp->hive_id = adev->gmc.xgmi.hive_id;
  52. INIT_LIST_HEAD(&tmp->device_list);
  53. return tmp;
  54. }
  55. int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
  56. {
  57. struct psp_xgmi_topology_info tmp_topology[AMDGPU_MAX_XGMI_DEVICE_PER_HIVE];
  58. struct amdgpu_hive_info *hive;
  59. struct amdgpu_xgmi *entry;
  60. struct amdgpu_device *tmp_adev;
  61. int count = 0, ret = -EINVAL;
  62. if ((adev->asic_type < CHIP_VEGA20) ||
  63. (adev->flags & AMD_IS_APU) )
  64. return 0;
  65. adev->gmc.xgmi.device_id = psp_xgmi_get_device_id(&adev->psp);
  66. adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
  67. memset(&tmp_topology[0], 0, sizeof(tmp_topology));
  68. mutex_lock(&xgmi_mutex);
  69. hive = amdgpu_get_xgmi_hive(adev);
  70. if (!hive)
  71. goto exit;
  72. list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
  73. list_for_each_entry(entry, &hive->device_list, head)
  74. tmp_topology[count++].device_id = entry->device_id;
  75. ret = psp_xgmi_get_topology_info(&adev->psp, count, tmp_topology);
  76. if (ret) {
  77. dev_err(adev->dev,
  78. "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
  79. adev->gmc.xgmi.device_id,
  80. adev->gmc.xgmi.hive_id, ret);
  81. goto exit;
  82. }
  83. /* Each psp need to set the latest topology */
  84. list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
  85. ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
  86. if (ret) {
  87. dev_err(tmp_adev->dev,
  88. "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
  89. tmp_adev->gmc.xgmi.device_id,
  90. tmp_adev->gmc.xgmi.hive_id, ret);
  91. /* To do : continue with some node failed or disable the whole hive */
  92. break;
  93. }
  94. }
  95. if (!ret)
  96. dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
  97. adev->gmc.xgmi.physical_node_id,
  98. adev->gmc.xgmi.hive_id);
  99. exit:
  100. mutex_unlock(&xgmi_mutex);
  101. return ret;
  102. }