|
@@ -39,7 +39,7 @@
|
|
* the PDC INTRIGUE calls. This is done to eliminate bugs introduced
|
|
* the PDC INTRIGUE calls. This is done to eliminate bugs introduced
|
|
* in various PDC revisions. The code is much more maintainable
|
|
* in various PDC revisions. The code is much more maintainable
|
|
* and reliable this way vs having to debug on every version of PDC
|
|
* and reliable this way vs having to debug on every version of PDC
|
|
- * on every box.
|
|
|
|
|
|
+ * on every box.
|
|
*/
|
|
*/
|
|
|
|
|
|
#include <linux/capability.h>
|
|
#include <linux/capability.h>
|
|
@@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr);
|
|
static int perf_release(struct inode *inode, struct file *file);
|
|
static int perf_release(struct inode *inode, struct file *file);
|
|
static int perf_open(struct inode *inode, struct file *file);
|
|
static int perf_open(struct inode *inode, struct file *file);
|
|
static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
|
|
static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
|
|
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,
|
|
|
|
- loff_t *ppos);
|
|
|
|
|
|
+static ssize_t perf_write(struct file *file, const char __user *buf,
|
|
|
|
+ size_t count, loff_t *ppos);
|
|
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
|
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
|
static void perf_start_counters(void);
|
|
static void perf_start_counters(void);
|
|
static int perf_stop_counters(uint32_t *raddr);
|
|
static int perf_stop_counters(uint32_t *raddr);
|
|
@@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void);
|
|
/*
|
|
/*
|
|
* configure:
|
|
* configure:
|
|
*
|
|
*
|
|
- * Configure the cpu with a given data image. First turn off the counters,
|
|
|
|
|
|
+ * Configure the cpu with a given data image. First turn off the counters,
|
|
* then download the image, then turn the counters back on.
|
|
* then download the image, then turn the counters back on.
|
|
*/
|
|
*/
|
|
static int perf_config(uint32_t *image_ptr)
|
|
static int perf_config(uint32_t *image_ptr)
|
|
@@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr)
|
|
error = perf_stop_counters(raddr);
|
|
error = perf_stop_counters(raddr);
|
|
if (error != 0) {
|
|
if (error != 0) {
|
|
printk("perf_config: perf_stop_counters = %ld\n", error);
|
|
printk("perf_config: perf_stop_counters = %ld\n", error);
|
|
- return -EINVAL;
|
|
|
|
|
|
+ return -EINVAL;
|
|
}
|
|
}
|
|
|
|
|
|
printk("Preparing to write image\n");
|
|
printk("Preparing to write image\n");
|
|
@@ -242,7 +242,7 @@ printk("Preparing to write image\n");
|
|
error = perf_write_image((uint64_t *)image_ptr);
|
|
error = perf_write_image((uint64_t *)image_ptr);
|
|
if (error != 0) {
|
|
if (error != 0) {
|
|
printk("perf_config: DOWNLOAD = %ld\n", error);
|
|
printk("perf_config: DOWNLOAD = %ld\n", error);
|
|
- return -EINVAL;
|
|
|
|
|
|
+ return -EINVAL;
|
|
}
|
|
}
|
|
|
|
|
|
printk("Preparing to start counters\n");
|
|
printk("Preparing to start counters\n");
|
|
@@ -254,7 +254,7 @@ printk("Preparing to start counters\n");
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Open the device and initialize all of its memory. The device is only
|
|
|
|
|
|
+ * Open the device and initialize all of its memory. The device is only
|
|
* opened once, but can be "queried" by multiple processes that know its
|
|
* opened once, but can be "queried" by multiple processes that know its
|
|
* file descriptor.
|
|
* file descriptor.
|
|
*/
|
|
*/
|
|
@@ -298,19 +298,19 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t
|
|
* called on the processor that the download should happen
|
|
* called on the processor that the download should happen
|
|
* on.
|
|
* on.
|
|
*/
|
|
*/
|
|
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,
|
|
|
|
- loff_t *ppos)
|
|
|
|
|
|
+static ssize_t perf_write(struct file *file, const char __user *buf,
|
|
|
|
+ size_t count, loff_t *ppos)
|
|
{
|
|
{
|
|
size_t image_size;
|
|
size_t image_size;
|
|
uint32_t image_type;
|
|
uint32_t image_type;
|
|
uint32_t interface_type;
|
|
uint32_t interface_type;
|
|
uint32_t test;
|
|
uint32_t test;
|
|
|
|
|
|
- if (perf_processor_interface == ONYX_INTF)
|
|
|
|
|
|
+ if (perf_processor_interface == ONYX_INTF)
|
|
image_size = PCXU_IMAGE_SIZE;
|
|
image_size = PCXU_IMAGE_SIZE;
|
|
- else if (perf_processor_interface == CUDA_INTF)
|
|
|
|
|
|
+ else if (perf_processor_interface == CUDA_INTF)
|
|
image_size = PCXW_IMAGE_SIZE;
|
|
image_size = PCXW_IMAGE_SIZE;
|
|
- else
|
|
|
|
|
|
+ else
|
|
return -EFAULT;
|
|
return -EFAULT;
|
|
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
@@ -330,22 +330,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
|
|
|
|
|
|
/* First check the machine type is correct for
|
|
/* First check the machine type is correct for
|
|
the requested image */
|
|
the requested image */
|
|
- if (((perf_processor_interface == CUDA_INTF) &&
|
|
|
|
- (interface_type != CUDA_INTF)) ||
|
|
|
|
- ((perf_processor_interface == ONYX_INTF) &&
|
|
|
|
- (interface_type != ONYX_INTF)))
|
|
|
|
|
|
+ if (((perf_processor_interface == CUDA_INTF) &&
|
|
|
|
+ (interface_type != CUDA_INTF)) ||
|
|
|
|
+ ((perf_processor_interface == ONYX_INTF) &&
|
|
|
|
+ (interface_type != ONYX_INTF)))
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
/* Next check to make sure the requested image
|
|
/* Next check to make sure the requested image
|
|
is valid */
|
|
is valid */
|
|
- if (((interface_type == CUDA_INTF) &&
|
|
|
|
|
|
+ if (((interface_type == CUDA_INTF) &&
|
|
(test >= MAX_CUDA_IMAGES)) ||
|
|
(test >= MAX_CUDA_IMAGES)) ||
|
|
- ((interface_type == ONYX_INTF) &&
|
|
|
|
- (test >= MAX_ONYX_IMAGES)))
|
|
|
|
|
|
+ ((interface_type == ONYX_INTF) &&
|
|
|
|
+ (test >= MAX_ONYX_IMAGES)))
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
/* Copy the image into the processor */
|
|
/* Copy the image into the processor */
|
|
- if (interface_type == CUDA_INTF)
|
|
|
|
|
|
+ if (interface_type == CUDA_INTF)
|
|
return perf_config(cuda_images[test]);
|
|
return perf_config(cuda_images[test]);
|
|
else
|
|
else
|
|
return perf_config(onyx_images[test]);
|
|
return perf_config(onyx_images[test]);
|
|
@@ -359,7 +359,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
|
|
static void perf_patch_images(void)
|
|
static void perf_patch_images(void)
|
|
{
|
|
{
|
|
#if 0 /* FIXME!! */
|
|
#if 0 /* FIXME!! */
|
|
-/*
|
|
|
|
|
|
+/*
|
|
* NOTE: this routine is VERY specific to the current TLB image.
|
|
* NOTE: this routine is VERY specific to the current TLB image.
|
|
* If the image is changed, this routine might also need to be changed.
|
|
* If the image is changed, this routine might also need to be changed.
|
|
*/
|
|
*/
|
|
@@ -367,9 +367,9 @@ static void perf_patch_images(void)
|
|
extern void $i_dtlb_miss_2_0();
|
|
extern void $i_dtlb_miss_2_0();
|
|
extern void PA2_0_iva();
|
|
extern void PA2_0_iva();
|
|
|
|
|
|
- /*
|
|
|
|
|
|
+ /*
|
|
* We can only use the lower 32-bits, the upper 32-bits should be 0
|
|
* We can only use the lower 32-bits, the upper 32-bits should be 0
|
|
- * anyway given this is in the kernel
|
|
|
|
|
|
+ * anyway given this is in the kernel
|
|
*/
|
|
*/
|
|
uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0);
|
|
uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0);
|
|
uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0);
|
|
uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0);
|
|
@@ -377,21 +377,21 @@ static void perf_patch_images(void)
|
|
|
|
|
|
if (perf_processor_interface == ONYX_INTF) {
|
|
if (perf_processor_interface == ONYX_INTF) {
|
|
/* clear last 2 bytes */
|
|
/* clear last 2 bytes */
|
|
- onyx_images[TLBMISS][15] &= 0xffffff00;
|
|
|
|
|
|
+ onyx_images[TLBMISS][15] &= 0xffffff00;
|
|
/* set 2 bytes */
|
|
/* set 2 bytes */
|
|
onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
|
|
onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
|
|
onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
|
|
onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
|
|
onyx_images[TLBMISS][17] = itlb_addr;
|
|
onyx_images[TLBMISS][17] = itlb_addr;
|
|
|
|
|
|
/* clear last 2 bytes */
|
|
/* clear last 2 bytes */
|
|
- onyx_images[TLBHANDMISS][15] &= 0xffffff00;
|
|
|
|
|
|
+ onyx_images[TLBHANDMISS][15] &= 0xffffff00;
|
|
/* set 2 bytes */
|
|
/* set 2 bytes */
|
|
onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
|
|
onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
|
|
onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
|
|
onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
|
|
onyx_images[TLBHANDMISS][17] = itlb_addr;
|
|
onyx_images[TLBHANDMISS][17] = itlb_addr;
|
|
|
|
|
|
/* clear last 2 bytes */
|
|
/* clear last 2 bytes */
|
|
- onyx_images[BIG_CPI][15] &= 0xffffff00;
|
|
|
|
|
|
+ onyx_images[BIG_CPI][15] &= 0xffffff00;
|
|
/* set 2 bytes */
|
|
/* set 2 bytes */
|
|
onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
|
|
onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
|
|
onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
|
|
onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
|
|
@@ -404,24 +404,24 @@ static void perf_patch_images(void)
|
|
|
|
|
|
} else if (perf_processor_interface == CUDA_INTF) {
|
|
} else if (perf_processor_interface == CUDA_INTF) {
|
|
/* Cuda interface */
|
|
/* Cuda interface */
|
|
- cuda_images[TLBMISS][16] =
|
|
|
|
|
|
+ cuda_images[TLBMISS][16] =
|
|
(cuda_images[TLBMISS][16]&0xffff0000) |
|
|
(cuda_images[TLBMISS][16]&0xffff0000) |
|
|
((dtlb_addr >> 8)&0x0000ffff);
|
|
((dtlb_addr >> 8)&0x0000ffff);
|
|
- cuda_images[TLBMISS][17] =
|
|
|
|
|
|
+ cuda_images[TLBMISS][17] =
|
|
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
|
|
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
|
|
cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
|
|
cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
|
|
|
|
|
|
- cuda_images[TLBHANDMISS][16] =
|
|
|
|
|
|
+ cuda_images[TLBHANDMISS][16] =
|
|
(cuda_images[TLBHANDMISS][16]&0xffff0000) |
|
|
(cuda_images[TLBHANDMISS][16]&0xffff0000) |
|
|
((dtlb_addr >> 8)&0x0000ffff);
|
|
((dtlb_addr >> 8)&0x0000ffff);
|
|
- cuda_images[TLBHANDMISS][17] =
|
|
|
|
|
|
+ cuda_images[TLBHANDMISS][17] =
|
|
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
|
|
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
|
|
cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
|
|
cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
|
|
|
|
|
|
- cuda_images[BIG_CPI][16] =
|
|
|
|
|
|
+ cuda_images[BIG_CPI][16] =
|
|
(cuda_images[BIG_CPI][16]&0xffff0000) |
|
|
(cuda_images[BIG_CPI][16]&0xffff0000) |
|
|
((dtlb_addr >> 8)&0x0000ffff);
|
|
((dtlb_addr >> 8)&0x0000ffff);
|
|
- cuda_images[BIG_CPI][17] =
|
|
|
|
|
|
+ cuda_images[BIG_CPI][17] =
|
|
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
|
|
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
|
|
cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
|
|
cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
|
|
} else {
|
|
} else {
|
|
@@ -433,7 +433,7 @@ static void perf_patch_images(void)
|
|
|
|
|
|
/*
|
|
/*
|
|
* ioctl routine
|
|
* ioctl routine
|
|
- * All routines effect the processor that they are executed on. Thus you
|
|
|
|
|
|
+ * All routines effect the processor that they are executed on. Thus you
|
|
* must be running on the processor that you wish to change.
|
|
* must be running on the processor that you wish to change.
|
|
*/
|
|
*/
|
|
|
|
|
|
@@ -459,7 +459,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
}
|
|
}
|
|
|
|
|
|
/* copy out the Counters */
|
|
/* copy out the Counters */
|
|
- if (copy_to_user((void __user *)arg, raddr,
|
|
|
|
|
|
+ if (copy_to_user((void __user *)arg, raddr,
|
|
sizeof (raddr)) != 0) {
|
|
sizeof (raddr)) != 0) {
|
|
error = -EFAULT;
|
|
error = -EFAULT;
|
|
break;
|
|
break;
|
|
@@ -487,7 +487,7 @@ static const struct file_operations perf_fops = {
|
|
.open = perf_open,
|
|
.open = perf_open,
|
|
.release = perf_release
|
|
.release = perf_release
|
|
};
|
|
};
|
|
-
|
|
|
|
|
|
+
|
|
static struct miscdevice perf_dev = {
|
|
static struct miscdevice perf_dev = {
|
|
MISC_DYNAMIC_MINOR,
|
|
MISC_DYNAMIC_MINOR,
|
|
PA_PERF_DEV,
|
|
PA_PERF_DEV,
|
|
@@ -595,7 +595,7 @@ static int perf_stop_counters(uint32_t *raddr)
|
|
/* OR sticky2 (bit 1496) to counter2 bit 32 */
|
|
/* OR sticky2 (bit 1496) to counter2 bit 32 */
|
|
tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
|
|
tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
|
|
raddr[2] = (uint32_t)tmp64;
|
|
raddr[2] = (uint32_t)tmp64;
|
|
-
|
|
|
|
|
|
+
|
|
/* Counter3 is bits 1497 to 1528 */
|
|
/* Counter3 is bits 1497 to 1528 */
|
|
tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff;
|
|
tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff;
|
|
/* OR sticky3 (bit 1529) to counter3 bit 32 */
|
|
/* OR sticky3 (bit 1529) to counter3 bit 32 */
|
|
@@ -617,7 +617,7 @@ static int perf_stop_counters(uint32_t *raddr)
|
|
userbuf[22] = 0;
|
|
userbuf[22] = 0;
|
|
userbuf[23] = 0;
|
|
userbuf[23] = 0;
|
|
|
|
|
|
- /*
|
|
|
|
|
|
+ /*
|
|
* Write back the zeroed bytes + the image given
|
|
* Write back the zeroed bytes + the image given
|
|
* the read was destructive.
|
|
* the read was destructive.
|
|
*/
|
|
*/
|
|
@@ -625,13 +625,13 @@ static int perf_stop_counters(uint32_t *raddr)
|
|
} else {
|
|
} else {
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Read RDR-15 which contains the counters and sticky bits
|
|
|
|
|
|
+ * Read RDR-15 which contains the counters and sticky bits
|
|
*/
|
|
*/
|
|
if (!perf_rdr_read_ubuf(15, userbuf)) {
|
|
if (!perf_rdr_read_ubuf(15, userbuf)) {
|
|
return -13;
|
|
return -13;
|
|
}
|
|
}
|
|
|
|
|
|
- /*
|
|
|
|
|
|
+ /*
|
|
* Clear out the counters
|
|
* Clear out the counters
|
|
*/
|
|
*/
|
|
perf_rdr_clear(15);
|
|
perf_rdr_clear(15);
|
|
@@ -644,7 +644,7 @@ static int perf_stop_counters(uint32_t *raddr)
|
|
raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
|
|
raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
|
|
raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
|
|
raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
|
|
}
|
|
}
|
|
-
|
|
|
|
|
|
+
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -682,7 +682,7 @@ static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer)
|
|
i = tentry->num_words;
|
|
i = tentry->num_words;
|
|
while (i--) {
|
|
while (i--) {
|
|
buffer[i] = 0;
|
|
buffer[i] = 0;
|
|
- }
|
|
|
|
|
|
+ }
|
|
|
|
|
|
/* Check for bits an even number of 64 */
|
|
/* Check for bits an even number of 64 */
|
|
if ((xbits = width & 0x03f) != 0) {
|
|
if ((xbits = width & 0x03f) != 0) {
|
|
@@ -808,18 +808,22 @@ static int perf_write_image(uint64_t *memaddr)
|
|
}
|
|
}
|
|
|
|
|
|
runway = ioremap_nocache(cpu_device->hpa.start, 4096);
|
|
runway = ioremap_nocache(cpu_device->hpa.start, 4096);
|
|
|
|
+ if (!runway) {
|
|
|
|
+ pr_err("perf_write_image: ioremap failed!\n");
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
|
|
/* Merge intrigue bits into Runway STATUS 0 */
|
|
/* Merge intrigue bits into Runway STATUS 0 */
|
|
tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
|
|
tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
|
|
- __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
|
|
|
|
|
|
+ __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
|
|
runway + RUNWAY_STATUS);
|
|
runway + RUNWAY_STATUS);
|
|
-
|
|
|
|
|
|
+
|
|
/* Write RUNWAY DEBUG registers */
|
|
/* Write RUNWAY DEBUG registers */
|
|
for (i = 0; i < 8; i++) {
|
|
for (i = 0; i < 8; i++) {
|
|
__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
|
|
__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
|
|
}
|
|
}
|
|
|
|
|
|
- return 0;
|
|
|
|
|
|
+ return 0;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -843,7 +847,7 @@ printk("perf_rdr_write\n");
|
|
perf_rdr_shift_out_U(rdr_num, buffer[i]);
|
|
perf_rdr_shift_out_U(rdr_num, buffer[i]);
|
|
} else {
|
|
} else {
|
|
perf_rdr_shift_out_W(rdr_num, buffer[i]);
|
|
perf_rdr_shift_out_W(rdr_num, buffer[i]);
|
|
- }
|
|
|
|
|
|
+ }
|
|
}
|
|
}
|
|
printk("perf_rdr_write done\n");
|
|
printk("perf_rdr_write done\n");
|
|
}
|
|
}
|