ospf6d: use ROUTE_FLAGFILTER
Like ospfd, ospf6d can use ROUTE_FLAGFILTER to opt out of receiving messages relating to L2 and broadcast routes on its routing socket. We've been running this for a week or so with no problems. ok? Index: kroute.c === RCS file: /cvs/src/usr.sbin/ospf6d/kroute.c,v retrieving revision 1.64 diff -u -p -u -p -r1.64 kroute.c --- kroute.c17 May 2020 18:29:25 - 1.64 +++ kroute.c18 Aug 2020 11:56:09 - @@ -102,6 +102,7 @@ kr_init(int fs, u_int rdomain, int redis int opt = 0, rcvbuf, default_rcvbuf; socklen_t optlen; int filter_prio = fib_prio; + int filter_flags = RTF_LLINFO | RTF_BROADCAST; kr_state.fib_sync = fs; kr_state.rdomain = rdomain; @@ -127,6 +128,12 @@ kr_init(int fs, u_int rdomain, int redis if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_PRIOFILTER, _prio, sizeof(filter_prio)) == -1) { log_warn("%s: setsockopt AF_ROUTE ROUTE_PRIOFILTER", __func__); + /* not fatal */ + } + + if (setsockopt(kr_state.fd, AF_ROUTE, ROUTE_FLAGFILTER, _flags, + sizeof(filter_flags)) == -1) { + log_warn("%s: setsockopt AF_ROUTE ROUTE_FLAGFILTER", __func__); /* not fatal */ }
Re: [PATCH] Add common PCIE capability list
On Tue, Sep 01, 2020 at 11:44:03PM -0500, Jordan Hargrave wrote: > This patch adds a common function for scanning PCIE Express Capability list > The PCIE Capability list starts at 0x100 in extended PCI configuration space. This seems to only handle extended capabilities? Something like pcie_get_ext_capability() would be a better name. It is 'PCI Express' not 'PCIExpress' 'ofs & 3' test doesn't make sense when PCI_PCIE_ECAP_NEXT() always masks those bits. > > --- > sys/dev/pci/pci.c| 28 > sys/dev/pci/pcivar.h | 2 ++ > 2 files changed, 30 insertions(+) > > diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c > index bf75f875e..8f9a5ef7a 100644 > --- a/sys/dev/pci/pci.c > +++ b/sys/dev/pci/pci.c > @@ -677,6 +677,34 @@ pci_get_ht_capability(pci_chipset_tag_t pc, pcitag_t > tag, int capid, > return (0); > } > > +int > +pcie_get_capability(pci_chipset_tag_t pc, pcitag_t tag, int capid, > +int *offset, pcireg_t *value) > +{ > + pcireg_t reg; > + unsigned int ofs; > + > + /* Make sure we support PCIExpress device */ > + if (pci_get_capability(pc, tag, PCI_CAP_PCIEXPRESS, NULL, NULL) == 0) > + return (0); > + /* Scan PCIExpress capabilities */ > + ofs = PCI_PCIE_ECAP; > + while (ofs != 0) { > + if ((ofs & 3) || (ofs < PCI_PCIE_ECAP)) > + return (0); > + reg = pci_conf_read(pc, tag, ofs); > + if (PCI_PCIE_ECAP_ID(reg) == capid) { > + if (offset) > + *offset = ofs; > + if (value) > + *value = reg; > + return (1); > + } > + ofs = PCI_PCIE_ECAP_NEXT(reg); > + } > + return (0); > +} > + > uint16_t > pci_requester_id(pci_chipset_tag_t pc, pcitag_t tag) > { > diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h > index bdfe0404f..0376ba992 100644 > --- a/sys/dev/pci/pcivar.h > +++ b/sys/dev/pci/pcivar.h > @@ -233,6 +233,8 @@ int pci_io_find(pci_chipset_tag_t, pcitag_t, int, > bus_addr_t *, > int pci_mem_find(pci_chipset_tag_t, pcitag_t, int, bus_addr_t *, > bus_size_t *, int *); > > +int pcie_get_capability(pci_chipset_tag_t, pcitag_t, int, > + int *, pcireg_t *); > int pci_get_capability(pci_chipset_tag_t, pcitag_t, int, > int *, pcireg_t *); > int pci_get_ht_capability(pci_chipset_tag_t, pcitag_t, int, > -- > 2.26.2 > >
Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi
Oh good catch thanks. Weird, it does compile! From: Daniel Dickman Sent: Tuesday, September 1, 2020 11:23 PM To: Jordan Hargrave Cc: tech@openbsd.org Subject: Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi > [PATCH] Add IOMMU support for Intel VT-d and AMD Vi > > This hooks each pci device and overrides bus_dmamap_xxx to issue > remap of DMA requests to virtual DMA space. It protects devices > from issuing I/O requests to memory in the system that is outside > the requested DMA space. Hi Jordan, thanks for working on this. I would like to see iommu support... > + uint64_tefr; > + uint8_t reserved[8]; > +} __packd; ...that being said, is the above a typo?
[PATCH] Add common PCIE capability list
This patch adds a common function for scanning PCIE Express Capability list The PCIE Capability list starts at 0x100 in extended PCI configuration space. --- sys/dev/pci/pci.c| 28 sys/dev/pci/pcivar.h | 2 ++ 2 files changed, 30 insertions(+) diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index bf75f875e..8f9a5ef7a 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -677,6 +677,34 @@ pci_get_ht_capability(pci_chipset_tag_t pc, pcitag_t tag, int capid, return (0); } +int +pcie_get_capability(pci_chipset_tag_t pc, pcitag_t tag, int capid, +int *offset, pcireg_t *value) +{ + pcireg_t reg; + unsigned int ofs; + + /* Make sure we support PCIExpress device */ + if (pci_get_capability(pc, tag, PCI_CAP_PCIEXPRESS, NULL, NULL) == 0) + return (0); + /* Scan PCIExpress capabilities */ + ofs = PCI_PCIE_ECAP; + while (ofs != 0) { + if ((ofs & 3) || (ofs < PCI_PCIE_ECAP)) + return (0); + reg = pci_conf_read(pc, tag, ofs); + if (PCI_PCIE_ECAP_ID(reg) == capid) { + if (offset) + *offset = ofs; + if (value) + *value = reg; + return (1); + } + ofs = PCI_PCIE_ECAP_NEXT(reg); + } + return (0); +} + uint16_t pci_requester_id(pci_chipset_tag_t pc, pcitag_t tag) { diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h index bdfe0404f..0376ba992 100644 --- a/sys/dev/pci/pcivar.h +++ b/sys/dev/pci/pcivar.h @@ -233,6 +233,8 @@ int pci_io_find(pci_chipset_tag_t, pcitag_t, int, bus_addr_t *, intpci_mem_find(pci_chipset_tag_t, pcitag_t, int, bus_addr_t *, bus_size_t *, int *); +intpcie_get_capability(pci_chipset_tag_t, pcitag_t, int, + int *, pcireg_t *); intpci_get_capability(pci_chipset_tag_t, pcitag_t, int, int *, pcireg_t *); intpci_get_ht_capability(pci_chipset_tag_t, pcitag_t, int, -- 2.26.2
Re: [PATCH] Add IOMMU support for Intel VT-d and AMD-Vi
> [PATCH] Add IOMMU support for Intel VT-d and AMD Vi > > This hooks each pci device and overrides bus_dmamap_xxx to issue > remap of DMA requests to virtual DMA space. It protects devices > from issuing I/O requests to memory in the system that is outside > the requested DMA space. Hi Jordan, thanks for working on this. I would like to see iommu support... > + uint64_tefr; > + uint8_t reserved[8]; > +} __packd; ...that being said, is the above a typo?
Re: [macppc] clang-10: issue with ppc dag to dag pattern instruction selection
Moving from bugs to tech. cwen reported that base-clang crashed on macppc in graphics/babl and emulators/mednafen [1]. I observed that clang crashed on powerpc64 in mednafen. I now propose to backport a commit in llvm 11.x git [2] to prevent these crashes. This change affects other arches. [1] https://marc.info/?l=openbsd-bugs=159775378014683=2 [2] https://github.com/llvm/llvm-project/commit/d4ce862 FreeBSD on powerpc64 found the problems earlier: https://bugs.llvm.org/show_bug.cgi?id=45237 (babl) https://bugs.llvm.org/show_bug.cgi?id=45274 (mednafen) The problem is with clang -fno-unsafe-math-optimizations, one of the new floating-point options in clang-10. The diff disables the new options on all targets except X86 (and SystemZ), so it also affects arches other than PowerPC. The disabled options cause a warning. With the diff in clang: - mednafen stops using -fno-unsafe-math-optimizations, because the new warning fails the configure test. I can build and package mednafen on macppc (where a single *m68k*.cpp takes over 2 hours), but when I try to play Super Mario World (snes), mednafen crashes with a SIGSEGV. The build on powerpc64 stops crashing clang, but fails at an altivec error. - babl uses -fno-unsafe-math-optimizations and ignores the new warning. I can build and package babl, and all 27 tests pass on both macppc and powerpc64. - I have built src and xenocara on powerpc64. The diff is in llvm 11.x. To backport the diff, I manually applied some parts (where the context had changed), and I renamed RoundingMode::NearestTiesToEven back to FPR_RoundToNearest. Apply the diff in /usr/src/gnu/llvm. If DiagnosticFrontendKinds.inc exists under /usr/obj/gnu/usr.bin/clang/, you must delete the .inc, because there is a missing dependency in our Makefiles. Is this OK? Should we do something else? --George Index: clang/docs/ClangCommandLineReference.rst === RCS file: /cvs/src/gnu/llvm/clang/docs/ClangCommandLineReference.rst,v retrieving revision 1.1.1.2 diff -u -p -r1.1.1.2 ClangCommandLineReference.rst --- clang/docs/ClangCommandLineReference.rst9 Aug 2020 15:51:07 - 1.1.1.2 +++ clang/docs/ClangCommandLineReference.rst30 Aug 2020 19:37:51 - @@ -818,6 +818,10 @@ Enables the experimental global instruct Enables an experimental new pass manager in LLVM. +.. option:: -fexperimental-strict-floating-point + +Enables the use of non-default rounding modes and non-default exception handling on targets that are not currently ready. + .. option:: -ffine-grained-bitfield-accesses, -fno-fine-grained-bitfield-accesses Use separate accesses for consecutive bitfield runs with legal widths and alignments. Index: clang/include/clang/Basic/CodeGenOptions.def === RCS file: /cvs/src/gnu/llvm/clang/include/clang/Basic/CodeGenOptions.def,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 CodeGenOptions.def --- clang/include/clang/Basic/CodeGenOptions.def3 Aug 2020 14:31:32 - 1.1.1.1 +++ clang/include/clang/Basic/CodeGenOptions.def30 Aug 2020 19:37:51 - @@ -58,6 +58,8 @@ CODEGENOPT(DisableLLVMPasses , 1, 0) /// ///< frontend. CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers CODEGENOPT(DisableO0ImplyOptNone , 1, 0) ///< Don't annonate function with optnone at O0 +CODEGENOPT(ExperimentalStrictFloatingPoint, 1, 0) ///< Enables the new, experimental + ///< strict floating point. CODEGENOPT(ExperimentalNewPassManager, 1, 0) ///< Enables the new, experimental ///< pass manager. CODEGENOPT(DebugPassManager, 1, 0) ///< Prints debug information for the new Index: clang/include/clang/Basic/DiagnosticFrontendKinds.td === RCS file: /cvs/src/gnu/llvm/clang/include/clang/Basic/DiagnosticFrontendKinds.td,v retrieving revision 1.1.1.1 diff -u -p -r1.1.1.1 DiagnosticFrontendKinds.td --- clang/include/clang/Basic/DiagnosticFrontendKinds.td3 Aug 2020 14:31:32 - 1.1.1.1 +++ clang/include/clang/Basic/DiagnosticFrontendKinds.td30 Aug 2020 19:37:51 - @@ -37,6 +37,12 @@ def note_fe_backend_plugin: Note<"%0">, def warn_fe_override_module : Warning< "overriding the module target triple with %0">, InGroup>; +def warn_fe_backend_unsupported_fp_rounding : Warning< +"overriding currently unsupported rounding mode on this target">, +InGroup; +def warn_fe_backend_unsupported_fp_exceptions : Warning< +"overriding currently unsupported use of floating point exceptions " +"on this target">, InGroup; def remark_fe_backend_optimization_remark : Remark<"%0">, BackendInfo, InGroup; Index: clang/include/clang/Basic/DiagnosticGroups.td
[PATCH] Add IOMMU support for Intel VT-d and AMD-Vi
[PATCH] Add IOMMU support for Intel VT-d and AMD Vi This hooks each pci device and overrides bus_dmamap_xxx to issue remap of DMA requests to virtual DMA space. It protects devices from issuing I/O requests to memory in the system that is outside the requested DMA space. --- sys/arch/amd64/conf/GENERIC |1 + sys/arch/amd64/conf/RAMDISK |1 + sys/arch/amd64/conf/RAMDISK_CD |1 + sys/arch/amd64/include/pci_machdep.h |3 +- sys/arch/amd64/pci/pci_machdep.c | 15 +- sys/dev/acpi/acpi.c |5 + sys/dev/acpi/acpidmar.c | 2988 ++ sys/dev/acpi/acpidmar.h | 534 + sys/dev/acpi/acpireg.h | 21 +- sys/dev/acpi/amd_iommu.h | 358 +++ sys/dev/acpi/files.acpi |5 + sys/dev/pci/pci.c| 28 + sys/dev/pci/pcivar.h |2 + 13 files changed, 3959 insertions(+), 3 deletions(-) create mode 100644 sys/dev/acpi/acpidmar.c create mode 100644 sys/dev/acpi/acpidmar.h create mode 100644 sys/dev/acpi/amd_iommu.h diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC index 2c49f91a1..1eda12bc9 100644 --- a/sys/arch/amd64/conf/GENERIC +++ b/sys/arch/amd64/conf/GENERIC @@ -45,6 +45,7 @@ acpibtn* at acpi? acpicpu* at acpi? acpicmos* at acpi? acpidock* at acpi? +acpidmar0 at acpi? acpiec*at acpi? acpipci* at acpi? acpiprt* at acpi? diff --git a/sys/arch/amd64/conf/RAMDISK b/sys/arch/amd64/conf/RAMDISK index 10148add1..7ab48f32e 100644 --- a/sys/arch/amd64/conf/RAMDISK +++ b/sys/arch/amd64/conf/RAMDISK @@ -34,6 +34,7 @@ acpipci* at acpi? acpiprt* at acpi? acpimadt0 at acpi? #acpitz* at acpi? +acpidmar* at acpi? disable mpbios0at bios0 diff --git a/sys/arch/amd64/conf/RAMDISK_CD b/sys/arch/amd64/conf/RAMDISK_CD index 91022751e..82a24e210 100644 --- a/sys/arch/amd64/conf/RAMDISK_CD +++ b/sys/arch/amd64/conf/RAMDISK_CD @@ -48,6 +48,7 @@ sdhc* at acpi? acpihve* at acpi? chvgpio*at acpi? glkgpio* at acpi? +acpidmar* at acpi? disable mpbios0at bios0 diff --git a/sys/arch/amd64/include/pci_machdep.h b/sys/arch/amd64/include/pci_machdep.h index bc295cc22..c725bdc73 100644 --- a/sys/arch/amd64/include/pci_machdep.h +++ b/sys/arch/amd64/include/pci_machdep.h @@ -91,7 +91,8 @@ void *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t, int, struct cpu_info *, int (*)(void *), void *, const char *); void pci_intr_disestablish(pci_chipset_tag_t, void *); -#definepci_probe_device_hook(c, a) (0) +intpci_probe_device_hook(pci_chipset_tag_t, + struct pci_attach_args *); void pci_dev_postattach(struct device *, struct pci_attach_args *); diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c index cf4e835de..b700946a4 100644 --- a/sys/arch/amd64/pci/pci_machdep.c +++ b/sys/arch/amd64/pci/pci_machdep.c @@ -89,6 +89,11 @@ #include #endif +#include "acpi.h" +#if NACPI > 0 +#include +#endif + /* * Memory Mapped Configuration space access. * @@ -797,7 +802,15 @@ pci_init_extents(void) } } -#include "acpi.h" +int +pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa) +{ +#if NACPI > 0 + acpidmar_pci_hook(pc, pa); +#endif + return 0; +} + #if NACPI > 0 void acpi_pci_match(struct device *, struct pci_attach_args *); pcireg_t acpi_pci_min_powerstate(pci_chipset_tag_t, pcitag_t); diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c index a6239198e..ea11483ad 100644 --- a/sys/dev/acpi/acpi.c +++ b/sys/dev/acpi/acpi.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -2448,6 +2449,8 @@ acpi_sleep_pm(struct acpi_softc *sc, int state) sc->sc_fadt->pm2_cnt_blk && sc->sc_fadt->pm2_cnt_len) acpi_write_pmreg(sc, ACPIREG_PM2_CNT, 0, ACPI_PM2_ARB_DIS); + acpidmar_sw(DVACT_SUSPEND); + /* Write SLP_TYPx values */ rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0); regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0); @@ -2483,6 +2486,8 @@ acpi_resume_pm(struct acpi_softc *sc, int fromstate) { uint16_t rega, regb, en; + acpidmar_sw(DVACT_RESUME); + /* Write SLP_TYPx values */ rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0); regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0); diff --git a/sys/dev/acpi/acpidmar.c b/sys/dev/acpi/acpidmar.c new file mode 100644 index 0..48506e1b1 --- /dev/null +++ b/sys/dev/acpi/acpidmar.c @@ -0,0 +1,2988 @@ +/* + * Copyright (c) 2015 Jordan Hargrave + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright
Re: m_defrag(9) leak
On Mon, Aug 31, 2020 at 08:50:09AM +0200, Theo Buehler wrote: > On Tue, Aug 25, 2020 at 03:28:03PM +0200, Claudio Jeker wrote: > > On Tue, Aug 25, 2020 at 08:38:06PM +1000, Matt Dunwoodie wrote: > > > On Tue, 25 Aug 2020 08:54:10 +0200 > > > Claudio Jeker wrote: > > > > > > > On Tue, Aug 25, 2020 at 08:42:47AM +0200, Martin Pieuchot wrote: > > > > > Maxime Villard mentioned a leak due to a missing m_freem() in wg(4): > > > > > https://marc.info/?l=netbsd-tech-net=159827988018641=2 > > > > > > > > > > It seems to be that such leak is present in other uses of > > > > > m_defrag() in the tree. I won't take the time to go though all of > > > > > them, but an audit would be welcome. > > > > > > > > Why does wg(4) needs m_defrag? and why is it used in this way? It > > > > does not even check if m_defrag() is actually needed. > > > > > > > > m_defrag() should be used by HW drivers where DMA constraints require > > > > the mbuf to be flattened. Software dirvers should use m_pullup / > > > > m_pulldown etc instead. > > > > > > wg(4) uses m_defrag to bring all mbuf fragments into one, so it can > > > read handshake values, and decrypt data directly from the mbuf. In > > > both cases it needs the full length of the packet. > > > > For the handshake headers m_pullup() or m_pulldown() is normally the way > > to go. Doing a massive pullup to decrypt data is a poor design. You work > > against the network stack here and this comes at a reasonably high price > > (considering you need to allocate a new mbuf, mbuf cluster and copy all > > data over). > > > > > However you're right, it doesn't check if it needs defrag and yes > > > m_pullup is preferable. I have a new patch for m_pullup and includes a > > > comment on why it's done. Thoughts? > > > > > > There is one other case of "m_pullup(m, m->m_pkthdr.len)" in sppp_input. > > > > sppp(4) is not without its own flaws. That code was added because of > > alignment issues. It may actually no longer be needed but I have not the > > time right now to really think this through. > > > > > (To avoid any confusion with the previous patch, there is no need to > > > m_free(m) as m_pullup will do that for us.) > > > > > diff --git if_wg.c if_wg.c > > > index e5a1071ccf1..242bd105e72 100644 > > > --- if_wg.c > > > +++ if_wg.c > > > @@ -2022,7 +2022,13 @@ wg_input(void *_sc, struct mbuf *m, struct ip *ip, > > > struct ip6_hdr *ip6, > > > /* m has a IP/IPv6 header of hlen length, we don't need it anymore. */ > > > m_adj(m, hlen); > > > > > > - if (m_defrag(m, M_NOWAIT) != 0) > > > + /* > > > + * Ensure mbuf is contiguous over full length of packet. This is done > > > + * so we can directly read the handshake values in wg_handshake, and so > > > + * we can decrypt a transport packet by passing a single buffer to > > > + * noise_remote_decrypt in wg_decap. > > > + */ > > > + if ((m = m_pullup(m, m->m_pkthdr.len)) == NULL) > > > return NULL; > > > > > > if ((m->m_pkthdr.len == sizeof(struct wg_pkt_initiation) && > > > > I'm fine with this or the m_defrag fix to go in now to fix the mbuf leak. > > Could someone who understands this please commit either version of the > diff? I committed this diff now. Thanks
wg: count peers per interface not globally
The driver increases a static peer counter across all wg interfaces when creating peers, the peer number is only used in debug output, though. Output from console around recreating an interface (2 and 4 are the same): wg1: Receiving handshake response from peer 2 wg1: Receiving keepalive packet from peer 2 wg1: Sending keepalive packet to peer 2 # ifconfig wg0 destroy wg1: Peer 1 destroyed wg1: Peer 2 destroyed wg1: Destroyed interface # sh /etc/netstart wg0 wg1: Sending handshake initiation to peer 4 wg1: Receiving handshake response from peer 4 wg1: Receiving keepalive packet from peer 4 wg1: Sending keepalive packet to peer 4 I'm looking into an issue caused by having multiple `wgpeer's defined on a single wg(4) interface and debug log is helpful, but having to count along is not helpful, so I'd like to replace the global counter with an interface specific one that's already in place. There's another completely unused global counter which I remove as well in the diff below. Recreating as above now shows this: # ifconfig wg1 destroy wg1: Peer 0 destroyed wg1: Peer 1 destroyed wg1: Destroyed interface # sh /etc/netstart wg0 wg1: Peer 0 created wg1: Sending handshake initiation to peer 0 wg1: Receiving handshake response from peer 0 wg1: Peer 1 created wg1: Receiving keepalive packet from peer 0 Feedback? OK? Index: if_wg.c === RCS file: /cvs/src/sys/net/if_wg.c,v retrieving revision 1.13 diff -u -p -r1.13 if_wg.c --- if_wg.c 27 Aug 2020 21:27:17 - 1.13 +++ if_wg.c 1 Sep 2020 18:35:41 - @@ -370,8 +370,6 @@ int wg_clone_create(struct if_clone *, i intwg_clone_destroy(struct ifnet *); void wgattach(int); -uint64_t peer_counter = 0; -uint64_t keypair_counter = 0; struct poolwg_aip_pool; struct poolwg_peer_pool; struct poolwg_ratelimit_pool; @@ -398,7 +396,6 @@ wg_peer_create(struct wg_softc *sc, uint if ((peer = pool_get(_peer_pool, PR_NOWAIT)) == NULL) return NULL; - peer->p_id = peer_counter++; peer->p_sc = sc; noise_remote_init(>p_remote, public, >sc_local); @@ -442,7 +439,7 @@ wg_peer_create(struct wg_softc *sc, uint rw_enter_write(>sc_peer_lock); LIST_INSERT_HEAD(>sc_peer[idx], peer, p_pubkey_entry); TAILQ_INSERT_TAIL(>sc_peer_seq, peer, p_seq_entry); - sc->sc_peer_num++; + peer->p_id = sc->sc_peer_num++; rw_exit_write(>sc_peer_lock); DPRINTF(sc, "Peer %llu created\n", peer->p_id);
Re: amd64: calibrate lapic timer frequency in constant time
On Tue, Sep 01, 2020 at 06:14:05PM +0200, Mark Kettenis wrote: > > Date: Tue, 1 Sep 2020 11:05:26 -0500 > > From: Scott Cheloha > > > > Hi, > > > > At boot, if we don't know the lapic frequency offhand we compute it by > > waiting for a known clock (the i8254) with a known frequency to cycle > > a few times. > > > > Currently we cycle hz times. This doesn't make sense. There is > > little to no benefit to waiting additional cycles if your kernel is > > compiled with a larger HZ. Mostly it just makes the calibration take > > longer. > > > > Consider the common HZ=1000 case. What is the benefit of looping an > > additional 900 times? The point of diminishing returns is well under > > 1000 loops. > > > > 20-50 loops is probably sufficient to limit our error, but I don't > > want to break anything so let's use 100, like we do on default > > kernels. > > > > ok? > > Sorry, but this makes no sense to me. The current code waits for 1 > second regarless of what HZ is. > > And I expect that the accuracy of the measurement depends on the total > number elapsed time, so I expect a less acurate results if you only > wait 100 cycles at HZ=1000 (which is 0.1 second). Whoops. Yes, you're right, I'm having a slow moment. Nevermind that last patch. But this gives me an idea. Given that we're waiting the same amount of time (1 second) no matter the value of hz, and we're using the same clock to do the wait... why are we fussing with the inner workings of reading the i8254? We have a function for this: i8254_delay(). If we just use that and tell it to wait a million microseconds we can throw out a bunch of code. Index: lapic.c === RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v retrieving revision 1.55 diff -u -p -r1.55 lapic.c --- lapic.c 3 Aug 2019 14:57:51 - 1.55 +++ lapic.c 1 Sep 2020 16:40:39 - @@ -451,24 +451,6 @@ lapic_initclocks(void) i8254_inittimecounter_simple(); } - -extern int gettick(void); /* XXX put in header file */ -extern u_long rtclock_tval; /* XXX put in header file */ - -static __inline void -wait_next_cycle(void) -{ - unsigned int tick, tlast; - - tlast = (1 << 16); /* i8254 counter has 16 bits at most */ - for (;;) { - tick = gettick(); - if (tick > tlast) - return; - tlast = tick; - } -} - /* * Calibrate the local apic count-down timer (which is running at * bus-clock speed) vs. the i8254 counter/timer (which is running at @@ -484,7 +466,7 @@ void lapic_calibrate_timer(struct cpu_info *ci) { unsigned int startapic, endapic; - u_int64_t dtick, dapic, tmp; + u_int64_t tmp; u_long s; int i; @@ -504,29 +486,13 @@ lapic_calibrate_timer(struct cpu_info *c s = intr_disable(); - /* wait for current cycle to finish */ - wait_next_cycle(); - startapic = lapic_gettick(); - - /* wait the next hz cycles */ - for (i = 0; i < hz; i++) - wait_next_cycle(); - + i8254_delay(100); endapic = lapic_gettick(); intr_restore(s); - dtick = hz * rtclock_tval; - dapic = startapic-endapic; - - /* -* there are TIMER_FREQ ticks per second. -* in dtick ticks, there are dapic bus clocks. -*/ - tmp = (TIMER_FREQ * dapic) / dtick; - - lapic_per_second = tmp; + lapic_per_second = startapic - endapic; skip_calibration: printf("%s: apic clock running at %dMHz\n",
Re: amd64: calibrate lapic timer frequency in constant time
> Date: Tue, 1 Sep 2020 11:05:26 -0500 > From: Scott Cheloha > > Hi, > > At boot, if we don't know the lapic frequency offhand we compute it by > waiting for a known clock (the i8254) with a known frequency to cycle > a few times. > > Currently we cycle hz times. This doesn't make sense. There is > little to no benefit to waiting additional cycles if your kernel is > compiled with a larger HZ. Mostly it just makes the calibration take > longer. > > Consider the common HZ=1000 case. What is the benefit of looping an > additional 900 times? The point of diminishing returns is well under > 1000 loops. > > 20-50 loops is probably sufficient to limit our error, but I don't > want to break anything so let's use 100, like we do on default > kernels. > > ok? Sorry, but this makes no sense to me. The current code waits for 1 second regarless of what HZ is. And I expect that the accuracy of the measurement depends on the total number elapsed time, so I expect a less acurate results if you only wait 100 cycles at HZ=1000 (which is 0.1 second). > Index: lapic.c > === > RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v > retrieving revision 1.55 > diff -u -p -r1.55 lapic.c > --- lapic.c 3 Aug 2019 14:57:51 - 1.55 > +++ lapic.c 1 Sep 2020 15:58:41 - > @@ -509,15 +509,15 @@ lapic_calibrate_timer(struct cpu_info *c > > startapic = lapic_gettick(); > > - /* wait the next hz cycles */ > - for (i = 0; i < hz; i++) > + /* wait a few cycles */ > + for (i = 0; i < 100; i++) > wait_next_cycle(); > > endapic = lapic_gettick(); > > intr_restore(s); > > - dtick = hz * rtclock_tval; > + dtick = 100 * rtclock_tval; > dapic = startapic-endapic; > > /* > >
amd64: calibrate lapic timer frequency in constant time
Hi, At boot, if we don't know the lapic frequency offhand we compute it by waiting for a known clock (the i8254) with a known frequency to cycle a few times. Currently we cycle hz times. This doesn't make sense. There is little to no benefit to waiting additional cycles if your kernel is compiled with a larger HZ. Mostly it just makes the calibration take longer. Consider the common HZ=1000 case. What is the benefit of looping an additional 900 times? The point of diminishing returns is well under 1000 loops. 20-50 loops is probably sufficient to limit our error, but I don't want to break anything so let's use 100, like we do on default kernels. ok? Index: lapic.c === RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v retrieving revision 1.55 diff -u -p -r1.55 lapic.c --- lapic.c 3 Aug 2019 14:57:51 - 1.55 +++ lapic.c 1 Sep 2020 15:58:41 - @@ -509,15 +509,15 @@ lapic_calibrate_timer(struct cpu_info *c startapic = lapic_gettick(); - /* wait the next hz cycles */ - for (i = 0; i < hz; i++) + /* wait a few cycles */ + for (i = 0; i < 100; i++) wait_next_cycle(); endapic = lapic_gettick(); intr_restore(s); - dtick = hz * rtclock_tval; + dtick = 100 * rtclock_tval; dapic = startapic-endapic; /*
Re: getitimer(2), setitimer(2): merge critical sections
On Mon, Aug 17, 2020 at 05:55:34PM -0500, Scott Cheloha wrote: > > [...] Two week bump. In summary: - Merge the critical sections so that "timer swap" with setitimer(2) is atomic. - To do this, move error-free operations into a common kernel subroutine, setitimer(). Now we have one critical section. - Leave error-prone operations in sys_getitimer() and sys_setitimer(). In order to make the "timer swap" atomic we leave the timer installed if the copyout(9) fails. This isn't great, but it is unavoidable without permitting copyout(9) from within a mutex. FreeBSD and Linux went this direction, too. I would rather leave the timer running and have an atomic swap than race the hardclock(9) or the realitexpire() timeout. ok? Index: kern_time.c === RCS file: /cvs/src/sys/kern/kern_time.c,v retrieving revision 1.140 diff -u -p -r1.140 kern_time.c --- kern_time.c 12 Aug 2020 15:31:27 - 1.140 +++ kern_time.c 1 Sep 2020 15:19:07 - @@ -491,7 +491,7 @@ out: struct mutex itimer_mtx = MUTEX_INITIALIZER(IPL_CLOCK); /* - * Get value of an interval timer. The process virtual and + * Get and/or set value of an interval timer. The process virtual and * profiling virtual time timers are kept internally in the * way they are specified externally: in time until they expire. * @@ -509,6 +509,63 @@ struct mutex itimer_mtx = MUTEX_INITIALI * real time timers .it_interval. Rather, we compute the next time in * absolute time the timer should go off. */ +void +setitimer(int which, struct itimerval *itv, struct itimerval *olditv) +{ + struct itimerspec its, oldits; + struct itimerspec *itimer; + struct process *pr; + int timo; + + KASSERT(which >= ITIMER_REAL && which <= ITIMER_PROF); + + pr = curproc->p_p; + itimer = >ps_timer[which]; + + if (itv != NULL) { + TIMEVAL_TO_TIMESPEC(>it_value, _value); + TIMEVAL_TO_TIMESPEC(>it_interval, _interval); + } + + if (which != ITIMER_REAL) + mtx_enter(_mtx); + + if (olditv != NULL) + oldits = *itimer; + if (itv != NULL) { + if (which == ITIMER_REAL) { + struct timespec cts; + getnanouptime(); + if (timespecisset(_value)) { + timo = tstohz(_value); + timeout_add(>ps_realit_to, timo); + timespecadd(_value, , _value); + } else + timeout_del(>ps_realit_to); + } + *itimer = its; + } + + if (which != ITIMER_REAL) + mtx_leave(_mtx); + + if (olditv != NULL) { + if (which == ITIMER_REAL) { + struct timespec now; + getnanouptime(); + if (timespecisset(_value)) { + if (timespeccmp(_value, , <)) + timespecclear(_value); + else + timespecsub(_value, , + _value); + } + } + TIMESPEC_TO_TIMEVAL(>it_value, _value); + TIMESPEC_TO_TIMEVAL(>it_interval, _interval); + } +} + int sys_getitimer(struct proc *p, void *v, register_t *retval) { @@ -516,44 +573,16 @@ sys_getitimer(struct proc *p, void *v, r syscallarg(int) which; syscallarg(struct itimerval *) itv; } */ *uap = v; - struct itimerspec its; struct itimerval aitv; - struct itimerspec *itimer; int which; which = SCARG(uap, which); if (which < ITIMER_REAL || which > ITIMER_PROF) return (EINVAL); - itimer = >p_p->ps_timer[which]; memset(, 0, sizeof(aitv)); - if (which != ITIMER_REAL) - mtx_enter(_mtx); - its = *itimer; - if (which != ITIMER_REAL) - mtx_leave(_mtx); - - if (which == ITIMER_REAL) { - struct timespec now; - - getnanouptime(); - /* -* Convert from absolute to relative time in .it_value -* part of real time timer. If time for real time timer -* has passed return 0, else return difference between -* current time and time for the timer to go off. -*/ - if (timespecisset(_value)) { - if (timespeccmp(_value, , <)) - timespecclear(_value); - else - timespecsub(_value, , - _value); - } - } - TIMESPEC_TO_TIMEVAL(_value, _value); -
Re: shrinking and growing reallocs: a theoretical? bad case for performance
On 2020/08/31 08:39, Otto Moerbeek wrote: > A question from Theo made me think about realloc and come up with a > particular bad case for performance. I do not know if it happens in > practice, but it was easy to create a test program to hit the case. Not very scientific testing (a single attempt at building one port), but this seems to help quite a lot when compiling programs written in rust. I encourage others to test the diff :-)