Re: arm/sysreg.h use in C

2017-07-13 Thread aa e30
13.7.2017 17.50 "Mark Kettenis"  kirjoitti:

> Date: Thu, 13 Jul 2017 15:49:03 +0300
> From: Artturi Alm 
>
> On Sat, Jul 01, 2017 at 10:53:14AM +0300, Artturi Alm wrote:
> > Hi,
> >
> > just in case i didn't make it clear what it is for, here's diff "fixing"
> > current uses below, compile-tested.
> >
> > -Artturi
> >
>
> Hi,
>
> ping?
> Noone up for bikeshedding, or seen useless/worse than handcrafting?
> I think this would alleviate from some of the complementary commenting,
> regarding the CP15 reg usage, that is currently somewhat of necessity.

I'm not sure myself if doing something like this is actually an
improvement.


Ok, i'll try to get some fbsd dev to
comment why they never went for
this, just for my own curiousity,
so not pushing the diff any further.

-Artturi


> > diff --git a/sys/arch/arm/arm/cpufunc.c b/sys/arch/arm/arm/cpufunc.c
> > index c91108e7066..fcb56627af7 100644
> > --- a/sys/arch/arm/arm/cpufunc.c
> > +++ b/sys/arch/arm/arm/cpufunc.c
> > @@ -55,6 +55,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >
> >  #if defined(PERFCTRS)
> >  struct arm_pmc_funcs *arm_pmc;
> > @@ -176,8 +177,7 @@ arm_get_cachetype_cp15v7(void)
> > uint32_t sel, level;
> >
> > /* CTR - Cache Type Register */
> > -   __asm volatile("mrc p15, 0, %0, c0, c0, 1"
> > -   : "=r" (ctype));
> > +   __asm volatile("mrc " SR_STR(CP15_CTR(%0)) "\n" : "=r"(ctype));
> >
> > arm_dcache_min_line_size = 1 << (CPU_CT_DMINLINE(ctype) + 2);
> > arm_icache_min_line_size = 1 << (CPU_CT_IMINLINE(ctype) + 2);
> > @@ -185,8 +185,8 @@ arm_get_cachetype_cp15v7(void)
> > min(arm_icache_min_line_size, arm_dcache_min_line_size);
> >
> > /* CLIDR - Cache Level ID Register */
> > -   __asm volatile("mrc p15, 1, %0, c0, c0, 1"
> > -   : "=r" (cache_level_id) :);
> > +   __asm volatile("mrc " SR_STR(CP15_CLIDR(%0))
> > +   : "=r"(cache_level_id));
> > cpu_drain_writebuf();
> >
> > /* L1 Cache available. */
> > @@ -201,17 +201,18 @@ arm_get_cachetype_cp15v7(void)
> > cache_level_id & (0x2 << level)) {
> > sel = level << 1 | 0 << 0; /* L1 | unified/data
cache */
> > /* CSSELR - Cache Size Selection Register */
> > -   __asm volatile("mcr p15, 2, %0, c0, c0, 0"
> > -   :: "r" (sel));
> > +   __asm volatile("mcr " SR_STR(CP15_CSSELR(%0))
"\n"
> > +   :: "r"(sel));
> > cpu_drain_writebuf();
> > /* CCSIDR - Cache Size Identification Register */
> > -   __asm volatile("mrc p15, 1, %0, c0, c0, 0"
> > -   : "=r" (cachereg) :);
> > +   __asm volatile("mcr " SR_STR(CP15_CCSIDR(%0))
"\n"
> > +   : "=r"(cachereg));
> > cpu_drain_writebuf();
> > sets = ((cachereg >> 13) & 0x7fff) + 1;
> > arm_pdcache_line_size = 1 << ((cachereg & 0x7) + 4);
> > arm_pdcache_ways = ((cachereg >> 3) & 0x3ff) + 1;
> > -   arm_pdcache_size = arm_pdcache_line_size *
arm_pdcache_ways * sets;
> > +   arm_pdcache_size =
> > +   arm_pdcache_line_size * arm_pdcache_ways * sets;
> > switch (cachereg & 0xc000) {
> > case 0x:
> > arm_pcache_type = 0;
> > @@ -230,24 +231,26 @@ arm_get_cachetype_cp15v7(void)
> > if (cache_level_id & (0x1 << level)) {
> > sel = level << 1 | 1 << 0; /* L1 | instruction
cache */
> > /* CSSELR - Cache Size Selection Register */
> > -   __asm volatile("mcr p15, 2, %0, c0, c0, 0"
> > -   :: "r" (sel));
> > +   __asm volatile("mcr " SR_STR(CP15_CSSELR(%0))
"\n"
> > +   :: "r"(sel));
> > cpu_drain_writebuf();
> > /* CCSIDR - Cache Size Identification Register */
> > -   __asm volatile("mrc p15, 1, %0, c0, c0, 0"
> > -   : "=r" (cachereg) :);
> > +   __asm volatile("mcr " SR_STR(CP15_CCSIDR(%0))
"\n"
> > +   : "=r"(cachereg));
> > cpu_drain_writebuf();
> > sets = ((cachereg >> 13) & 0x7fff) + 1;
> > arm_picache_line_size = 1 << ((cachereg & 0x7) + 4);
> > arm_picache_ways = ((cachereg >> 3) & 0x3ff) + 1;
> > -   arm_picache_size = arm_picache_line_size *
arm_picache_ways * sets;
> > +   arm_picache_size =
> > +   arm_picache_line_size * arm_picache_ways * sets;
> > }
> > }
> >
> > arm_dcache_align = arm_pdcache_line_size;
> > arm_dcache_align_mask = arm_dcache_align - 1;
> >
> > - 

Re: arm quadruple bus space

2017-04-24 Thread aa e30
24.4.2017 22.54 "Mark Kettenis"  kirjoitti:

On armv7 and arm64 we have this dirty hack to be able to use the
com(4) driver on hardware blocks that have the registers spaced
differently than the origional NS8250/16450/16550.  One they I'm going
to fix this properly in com(4) itself, but not today.  Now on the
Rockchip RK3288, the Synopsis Designware UART block is implemented in
a way that only allows word-sized access.  Byte-sized writes have no
effect, and as a result the serial console remains silent.

The diff below changes the "quadruple" bus space access methonds to do
word-size access.  This seems to work fine on an Allwinner H3 and
Raspberry Pi 3.  It would be good if someone could test this on a
BeagleBone Black or some other TI board.

ok?


Index: arch/arm/armv7/armv7_a4x_io.S
===
RCS file: /cvs/src/sys/arch/arm/armv7/armv7_a4x_io.S,v
retrieving revision 1.1
diff -u -p -r1.1 armv7_a4x_io.S
--- arch/arm/armv7/armv7_a4x_io.S   8 May 2009 02:57:32 -   1.1
+++ arch/arm/armv7/armv7_a4x_io.S   24 Apr 2017 18:56:08 -
@@ -50,12 +50,11 @@
  */

 ENTRY(a4x_bs_r_1)
-   ldrbr0, [r1, r2, LSL #2]
+   ldr r0, [r1, r2, LSL #2]
mov pc, lr

 ENTRY(a4x_bs_r_2)
-   mov r2, r2, LSL #2
-   ldrhr0, [r1, r2]
+   ldr r0, [r1, r2, LSL #2]
mov pc, lr

 ENTRY(a4x_bs_r_4)
@@ -67,12 +66,11 @@ ENTRY(a4x_bs_r_4)
  */

 ENTRY(a4x_bs_w_1)
-   strbr3, [r1, r2, LSL #2]
+   str r3, [r1, r2, LSL #2]
mov pc, lr

 ENTRY(a4x_bs_w_2)
-   mov r2, r2, LSL #2
-   strhr3, [r1, r2]
+   str r3, [r1, r2, LSL #2]
mov pc, lr

 ENTRY(a4x_bs_w_4)
Index: arch/arm64/dev/arm64_bus_space.c
===
RCS file: /cvs/src/sys/arch/arm64/dev/arm64_bus_space.c,v
retrieving revision 1.3
diff -u -p -r1.3 arm64_bus_space.c
--- arch/arm64/dev/arm64_bus_space.c17 Feb 2017 19:20:22 -  1.3
+++ arch/arm64/dev/arm64_bus_space.c24 Apr 2017 18:56:08 -
@@ -262,13 +262,13 @@ generic_space_vaddr(bus_space_tag_t t, b
 uint8_t
 a4x_space_read_1(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o)
 {
-   return *(volatile uint8_t *)(h + (o*4));
+   return *(volatile uint32_t *)(h + (o*4));
 }

 uint16_t
 a4x_space_read_2(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o)
 {
-   return *(volatile uint16_t *)(h + (o*4));
+   return *(volatile uint32_t *)(h + (o*4));
 }

 uint32_t
@@ -287,14 +287,14 @@ void
 a4x_space_write_1(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o,
 uint8_t v)
 {
-   *(volatile uint8_t *)(h + (o*4)) = v;
+   *(volatile uint32_t *)(h + (o*4)) = v;
 }

 void
 a4x_space_write_2(bus_space_tag_t t, bus_space_handle_t h, bus_size_t o,
 uint16_t v)
 {
-   *(volatile uint16_t *)(h + (o*4)) = v;
+   *(volatile uint32_t *)(h + (o*4)) = v;
 }

 void


Fwiw.(not much) I used to run w/diff like above, and had np,
maybe this was for xscale or something i never tested with..

-aalm